WITH
eligibleUsers AS (SELECT userID FROM `logs.reviews` GROUP BY userID HAVING TIMESTAMP_DIFF(MAX(timestamp), MIN(TIMESTAMP), DAY) >= 31),
reviews AS (
SELECT
*,
COUNTIF(isRetry IS TRUE) OVER (PARTITION BY userID, cardID ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS retries,
RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS reviewNumber,
COUNTIF(reviewMarking = "forgotten" AND sessionID IS NULL) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS essayLapses,
MIN(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS firstTimestamp,
TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC), LAG(timestamp) OVER(PARTITION BY userID, cardID ORDER BY timestamp ASC), DAY) AS delta
FROM
`logs.reviews`),
samples AS (
SELECT
*
FROM
reviews
WHERE
retries > 0
AND essayLapses > 0
AND reviewNumber = essayLapses + 2
AND sessionID IS NOT NULL
AND isRetry IS NOT TRUE
AND firstTimestamp >= TIMESTAMP("2020-12-12"))
SELECT
COUNT(*) AS N,
COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy,
delta
FROM
samples
GROUP BY
delta
ORDER BY delta