WITH
users AS (SELECT * FROM `logs.registeredUsers` WHERE schedule NOT IN UNNEST(["original", "aggressiveStart"])),
essayReviews AS (SELECT DISTINCT userID, cardID, reviewMarking, (COUNT(*) OVER (PARTITION BY userID ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) AS cardCount FROM `logs.reviews` JOIN `users` USING (userID) WHERE reviewCount = 1 AND cardID IN (
SELECT
cardID
FROM
`logs.latestEssaysCards`
WHERE
essayName="qcvc")),
inEssay AS (SELECT "essay" AS condition, COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy FROM essayReviews WHERE cardCount >= 50 GROUP BY userID),
subsequentReviews AS (SELECT * EXCEPT (reviewMarking), essayReviews.reviewMarking AS essayMarking,r.reviewMarking AS repetitionMarking, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS fullReviewCount FROM `logs.reviews` AS r JOIN essayReviews USING (userID, cardID) WHERE sessionID IS NOT NULL AND isRetry IS FALSE AND beforeInterval >= 1000*60*60*24*2),
samples AS (SELECT MAX(beforeInterval) / (1000*60*60*24) AS days, COUNTIF(repetitionMarking="remembered" AND essayMarking="remembered") / COUNTIF(essayMarking="remembered") AS accuracy FROM subsequentReviews WHERE fullReviewCount = 1 AND cardCount >= 50 GROUP BY userID HAVING COUNT(*) >= ANY_VALUE(cardCount) * 0.95)
SELECT CAST(days AS STRING) AS condition, APPROX_QUANTILES(accuracy, 4), COUNT(*) FROM samples WHERE days in (7,14,30,60) GROUP BY condition