WITH
eligibleUsers AS (
SELECT
userID
FROM
`logs.reviews`
GROUP BY
userID
HAVING
TIMESTAMP_DIFF(MAX(timestamp), MIN(TIMESTAMP), DAY) >= 31),
reviews AS (
SELECT
*,
COUNTIF(isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS reviewNumber,
COUNTIF(reviewMarking = "forgotten") OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS lapses,
FIRST_VALUE(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS firstTimestamp,
TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC), LAG(timestamp) OVER(PARTITION BY userID, cardID ORDER BY timestamp ASC), DAY) AS delta
FROM
`logs.reviews`
WHERE
userID IN (
SELECT
userID
FROM
eligibleUsers)),
samples AS (
SELECT
*
FROM
reviews
WHERE
lapses >= 1
AND reviewNumber = 2
AND firstTimestamp >= TIMESTAMP("2020-12-12")
AND isRetry IS NOT TRUE)
SELECT
COUNT(*) AS N,
COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy,
delta
FROM
samples
GROUP BY
delta
ORDER BY
delta