WITH
users AS (SELECT userID, schedule FROM `logs.registeredUsers`),
forgottenInEssay AS (SELECT userID, cardID FROM `logs.reviews` WHERE cardID IN (SELECT cardID FROM `logs.latestEssaysCards` WHERE essayName="qcvc") AND reviewCount = 1 AND reviewMarking = "forgotten"),
essayAccuracies AS (SELECT userID, COUNTIF(reviewMarking="remembered")/COUNT(*) AS essayAccuracy FROM `logs.reviews` WHERE reviewCount=1 AND sessionID IS NULL GROUP BY userID),
sessionReviews AS (SELECT userID, cardID, reviewMarking, beforeInterval, isRetry, TIMESTAMP_DIFF(timestamp, LAG(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), HOUR) AS delay, TIMESTAMP_DIFF(LAG(nextDueTimestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), LAG(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), HOUR) AS scheduledDelay, COUNTIF(sessionID IS NOT NULL) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS sessionNumber, COUNTIF(isRetry IS TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS previousRetries FROM `logs.reviews` JOIN forgottenInEssay USING (userID, cardID)),
reviewSamples AS (SELECT userID, cardID, reviewMarking, beforeInterval, delay FROM sessionReviews WHERE sessionNumber = 1 AND isRetry IS NOT TRUE AND previousRetries > 0 AND scheduledDelay = beforeInterval/(1000*60*60)),
userSamples AS (SELECT userID, beforeInterval, ROUND(delay/(24*7))*7 as dayBucket, ANY_VALUE(essayAccuracy) AS essayAccuracy FROM reviewSamples JOIN essayAccuracies USING (userID) JOIN users USING (userID) WHERE (schedule="aggressiveStart" AND beforeInterval=432000000) OR (schedule!="aggressiveStart" AND schedule!="original" AND beforeInterval=86400000) GROUP BY beforeInterval, dayBucket,userID)
SELECT beforeInterval, dayBucket, APPROX_QUANTILES(essayAccuracy, 4) FROM userSamples GROUP BY beforeInterval, dayBucket HAVING COUNT(DISTINCT userID) > 10 ORDER BY beforeInterval, dayBucket