20220201083843

WITH
users AS (SELECT * FROM `logs.registeredUsers` WHERE schedule NOT IN UNNEST(["original", "aggressiveStart"])),

essayReviews AS (SELECT DISTINCT userID, cardID, reviewMarking, (COUNT(*) OVER (PARTITION BY userID ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) AS cardCount FROM `logs.reviews` JOIN `users` USING (userID) WHERE reviewCount = 1 AND cardID IN (
    SELECT
      cardID
    FROM
      `logs.latestEssaysCards`
    WHERE
      essayName="qcvc")),
      
inEssay AS (SELECT "essay" AS condition, COUNTIF(reviewMarking="remembered")/COUNT(*) AS accuracy FROM essayReviews WHERE cardCount >= 50 GROUP BY userID),

subsequentReviews AS (SELECT * EXCEPT (reviewMarking), essayReviews.reviewMarking AS essayMarking,r.reviewMarking AS repetitionMarking, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS fullReviewCount FROM `logs.reviews` AS r JOIN essayReviews USING (userID, cardID) WHERE sessionID IS NOT NULL AND isRetry IS FALSE AND beforeInterval >= 1000*60*60*24*2),

samples AS (SELECT MAX(beforeInterval) / (1000*60*60*24) AS days, COUNTIF(repetitionMarking="remembered" AND essayMarking="remembered") / COUNTIF(essayMarking="remembered") AS accuracy FROM subsequentReviews WHERE fullReviewCount = 1 AND cardCount >= 50 GROUP BY userID HAVING COUNT(*) >= ANY_VALUE(cardCount) * 0.95)

SELECT CAST(days AS STRING) AS condition, APPROX_QUANTILES(accuracy, 4), COUNT(*) FROM samples WHERE days in (7,14,30,60) GROUP BY condition
Last updated 2023-07-13.