20210413104653

WITH conditions AS (SELECT userID, JSON_EXTRACT_SCALAR(extra, "$['condition']") AS condition FROM `metabook-qcc.logs.userEvents` WHERE timestamp >= TIMESTAMP("2020-01-28") AND JSON_EXTRACT_SCALAR(extra, "$['experimentName']") = "efficacy2" ORDER BY condition),

enrolledUsers AS (SELECT userID, condition, timestamp as enrollmentTimestamp FROM (SELECT * FROM `metabook-qcc.logs.userEvents` WHERE eventName="efficacy2Enroll") JOIN conditions USING (userID)),

cards AS (SELECT * FROM UNNEST(([  "h1AXHXVtsGKxkamS8Hb2",
  "6NRxe1wnphNsMjTZ9LXf",
  "xQte6vwqUwy2j5OWdxjK",
  "1ENYKL02RT8aidhI917F",
  "zRgw2osbesLEQ5ADo8DD",
  "E4g8wyr2folQ87Slf3ZO",
  "7QM6pEJ9bxZHK50e1kww",
  "xiNW1zgeb2ITHGi6uQtg",
  "s8duZcGBbu0dxb4xEAGg"
  ])) AS cardID),
  
relevantReviews AS (SELECT userID, cardID, reviewMarking, timestamp, sessionID, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) as reviewNumber FROM `metabook-qcc.logs.reviews` WHERE timestamp >= TIMESTAMP("2020-01-28") AND userID IN (SELECT userID FROM enrolledUsers) AND cardID IN (SELECT cardID FROM cards) AND sessionID IS NOT NULL),

eligibleUsers AS (SELECT userID FROM enrolledUsers JOIN (SELECT userID, COUNT(*) AS reviewCount FROM relevantReviews WHERE reviewNumber = 3 GROUP BY userID HAVING reviewCount = 9) USING (userID) WHERE condition = "delay5Days" GROUP BY userID),

userSamples AS (SELECT reviewNumber, COUNTIF(reviewMarking="remembered") AS correctCount FROM relevantReviews JOIN eligibleUsers USING (userID) GROUP BY userID, reviewNumber)

SELECT reviewNumber, APPROX_QUANTILES(correctCount, 4), AVG(correctCount), COUNT(*) AS N FROM userSamples WHERE reviewNumber <= 3 GROUP BY reviewNumber ORDER BY reviewNumber

-- SELECT reviewNumber, correctCount, COUNT(*) FROM userSamples WHERE reviewNumber <= 3 GROUP BY correctCount, reviewNumber ORDER BY reviewNumber, correctCount