WITH
users AS (SELECT userID, schedule FROM `logs.registeredUsers` WHERE schedule != "original"),
forgottenInEssay AS (SELECT userID, cardID FROM `logs.reviews` WHERE cardID IN (SELECT cardID FROM `logs.latestEssaysCards` WHERE essayName="qcvc") AND reviewCount = 1 AND reviewMarking = "forgotten" AND sessionID IS NULL),
sessionReviews AS (SELECT userID, cardID, reviewMarking, beforeInterval, isRetry, TIMESTAMP_DIFF(timestamp, LAG(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), HOUR) AS delay, TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), LAG(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), HOUR) AS scheduledDelay, COUNTIF(sessionID IS NOT NULL) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS sessionNumber, COUNTIF(isRetry IS TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS previousRetries, COUNTIF(sessionID IS NULL AND isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS essayReviews, COUNTIF(sessionID IS NOT NULL AND isRetry IS NOT TRUE AND reviewMarking="remembered") OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS sessionsRemembered FROM `logs.reviews` JOIN forgottenInEssay USING (userID, cardID)),
firstSessionRemembered AS (SELECT userID, cardID, MIN(sessionNumber) AS sessionNumber FROM sessionReviews WHERE sessionNumber > 0 AND isRetry IS NOT TRUE AND previousRetries > 0 AND scheduledDelay = beforeInterval/(1000*60*60) AND sessionsRemembered = 0 AND reviewMarking="remembered" GROUP BY userID, cardID),
samples AS (SELECT s.* FROM sessionReviews AS s JOIN firstSessionRemembered AS f ON (s.userID=f.userID AND s.cardID=f.cardID AND s.sessionNumber=f.sessionNumber+1 AND s.scheduledDelay = s.beforeInterval/(1000*60*60) AND essayReviews=1))
-- N.B. Using a hack here to separate aggressiveStart into a different group by adding 1ms to its interval.
SELECT IF(schedule="aggressiveStart", beforeInterval + 1, beforeInterval) AS beforeInterval, COUNTIF(reviewMarking="remembered") / COUNT(*) AS accuracy, COUNT(*) AS N, COUNT(DISTINCT userID) AS userCount, ROUND(delay/24) as dayBucket FROM samples JOIN users USING (userID) GROUP BY beforeInterval, dayBucket HAVING COUNT(DISTINCT userID) > 10 ORDER BY beforeInterval, dayBucket
Query