WITH
eligibleSamples AS (
SELECT
userID,
cardID
FROM
`logs.reviews`
GROUP BY
userID, cardID
HAVING
TIMESTAMP_DIFF(MAX(timestamp), MIN(TIMESTAMP), DAY) >= 31),
reviews AS (
SELECT
*,
COUNTIF(isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS reviewNumber,
COUNTIF(reviewMarking = "forgotten") OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS lapses,
FIRST_VALUE(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) AS firstTimestamp,
TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC), LAG(timestamp) OVER(PARTITION BY userID, cardID ORDER BY timestamp ASC), DAY) AS delta
FROM
`logs.reviews`
JOIN eligibleSamples USING (userID,cardID)),
firstSamples AS (
SELECT
*
FROM
reviews
WHERE
lapses = 0
AND reviewNumber = 2
AND firstTimestamp >= TIMESTAMP("2020-12-12")
AND isRetry IS NOT TRUE),
samples AS (SELECT reviews.*, firstSamples.delta AS firstDelta FROM reviews JOIN firstSamples USING (userID, cardID) WHERE reviews.reviewNumber = 3 AND firstSamples.reviewMarking = "remembered")
SELECT
firstDelta,
delta,
COUNT(*) AS N,
COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy,
FROM
samples
GROUP BY
firstDelta, delta
ORDER BY
firstDelta, delta