20210202095239

WITH
  eligibleUsers AS (
  SELECT
    userID
  FROM
    `logs.reviews`
  GROUP BY
    userID
  HAVING
    TIMESTAMP_DIFF(MAX(timestamp), MIN(TIMESTAMP), DAY) >= 31),
  reviews AS (
  SELECT
    *,
    COUNTIF(isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS reviewNumber,
    COUNTIF(reviewMarking = "forgotten") OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS lapses,
    FIRST_VALUE(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS firstTimestamp,
    TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC), LAG(timestamp) OVER(PARTITION BY userID, cardID ORDER BY timestamp ASC), DAY) AS delta
  FROM
    `logs.reviews`
  WHERE
    userID IN (
    SELECT
      userID
    FROM
      eligibleUsers)),
  samples AS (
  SELECT
    *
  FROM
    reviews
  WHERE
    lapses = 0
    AND reviewNumber = 2
    AND firstTimestamp >= TIMESTAMP("2020-12-12")
    AND reviewMarking="remembered"
    AND isRetry IS NOT TRUE),
    
nextReviewID AS (SELECT l.userID, l.cardID, ANY_VALUE(l.delta) AS delta, MIN(r.timestamp) AS timestamp FROM samples AS l join `logs.reviews` AS r ON (l.userID = r.userID AND l.cardID = r.cardID AND r.timestamp > l.timestamp) WHERE r.isRetry IS NOT TRUE GROUP BY userID, cardID),

nextSamples AS (SELECT * FROM `logs.reviews` JOIN nextReviewID USING (userID, cardID, timestamp))
    
SELECT
  COUNT(*) AS N,
  COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy,
  delta
FROM
  nextSamples
GROUP BY
  delta
ORDER BY
  delta
Last updated 2023-07-13.