WITH
eligibleUsers AS (SELECT userID FROM `logs.reviews` GROUP BY userID HAVING TIMESTAMP_DIFF(MAX(timestamp), MIN(TIMESTAMP), DAY) >= 31),
reviews AS (
SELECT
*,
RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS reviewNumber,
LAG(reviewMarking) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS lastMarking,
LAG(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS lastTimestamp,
TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC), LAG(timestamp) OVER(PARTITION BY userID, cardID ORDER BY timestamp ASC), DAY) AS delta
FROM
`logs.reviews` WHERE userID IN (SELECT userID FROM eligibleUsers) AND isRetry IS NOT TRUE),
samples AS (
SELECT
*
FROM
reviews
WHERE
reviewNumber = 2
AND lastMarking="forgotten"
AND lastTimestamp >= TIMESTAMP("2020-12-12"))
SELECT
COUNT(*) AS N,
COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy,
delta
FROM
samples
GROUP BY
delta
ORDER BY delta