20210302162337

WITH conditions AS (SELECT userID, JSON_EXTRACT_SCALAR(extra, "$['condition']") AS condition FROM `metabook-qcc.logs.userEvents` WHERE timestamp >= TIMESTAMP("2020-01-28") AND JSON_EXTRACT_SCALAR(extra, "$['experimentName']") = "efficacy2" ORDER BY condition),

enrolledUsers AS (SELECT userID, condition, timestamp as enrollmentTimestamp FROM (SELECT * FROM `metabook-qcc.logs.userEvents` WHERE eventName="efficacy2Enroll") JOIN conditions USING (userID)),

experimentalCards AS (SELECT * FROM UNNEST(([  "h1AXHXVtsGKxkamS8Hb2",
  "6NRxe1wnphNsMjTZ9LXf",
  "xQte6vwqUwy2j5OWdxjK",
  "1ENYKL02RT8aidhI917F",
  "zRgw2osbesLEQ5ADo8DD",
  "E4g8wyr2folQ87Slf3ZO",
  "7QM6pEJ9bxZHK50e1kww",
  "xiNW1zgeb2ITHGi6uQtg",
  "s8duZcGBbu0dxb4xEAGg"
  ])) AS cardID),
  
cards AS (SELECT cardID, cardID IN (SELECT cardID FROM experimentalCards) AS isExperimental FROM `logs.latestEssaysCards` WHERE essayName = "qcvc"),
  
relevantReviews AS (SELECT userID, cardID, reviewMarking, timestamp, sessionID, isExperimental FROM `metabook-qcc.logs.reviews` JOIN cards USING (cardID) WHERE timestamp >= TIMESTAMP("2020-01-28") AND userID IN (SELECT userID FROM enrolledUsers)),

initialReviewScores AS (SELECT userID, SAFE_DIVIDE(COUNTIF(reviewMarking="remembered" AND isExperimental IS TRUE), COUNTIF(isExperimental IS TRUE)) AS initialExperimentalAccuracy, SAFE_DIVIDE(COUNTIF(reviewMarking="remembered" AND isExperimental IS FALSE), COUNTIF(isExperimental IS FALSE)) AS initialControlAccuracy FROM (SELECT *, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) as reviewNumber FROM relevantReviews) WHERE reviewNumber = 1 AND sessionID IS NULL GROUP BY userID),

oneMonthReviewScores AS (SELECT userID, MAX(timestamp) AS oneMonthLastReviewTimestamp, SAFE_DIVIDE(COUNTIF(reviewMarking="remembered" AND isExperimental IS TRUE), COUNTIF(isExperimental IS TRUE)) AS oneMonthExperimentalAccuracy, COUNTIF(isExperimental IS TRUE) AS oneMonthExperimentalTotalCount, SAFE_DIVIDE(COUNTIF(reviewMarking="remembered" AND isExperimental IS FALSE), COUNTIF(isExperimental IS FALSE)) AS oneMonthControlAccuracy, COUNTIF(isExperimental IS FALSE) AS oneMonthControlTotalCount FROM (SELECT *, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) as reviewNumber FROM relevantReviews WHERE sessionID IS NOT NULL AND timestamp >= TIMESTAMP_ADD((SELECT enrollmentTimestamp FROM enrolledUsers WHERE enrolledUsers.userID = relevantReviews.userID), INTERVAL 30 DAY)) WHERE reviewNumber = 1 GROUP BY userID),

userSamples AS (SELECT * FROM enrolledUsers LEFT JOIN initialReviewScores USING (userID) LEFT JOIN oneMonthReviewScores USING (userID) WHERE oneMonthExperimentalTotalCount = 9 AND condition != "delay5Days" ORDER BY condition, oneMonthExperimentalAccuracy DESC)

SELECT condition, CASE WHEN initialControlAccuracy > 0.95 THEN 0.95 WHEN initialControlAccuracy > 0.9 THEN 0.9 WHEN initialControlAccuracy > 0.85 THEN 0.85 WHEN initialControlAccuracy > 0.8 THEN 0.8 END AS bucket, COUNT(*) AS N, AVG(oneMonthControlAccuracy) as control, AVG(oneMonthExperimentalAccuracy) as experimental FROM userSamples GROUP BY condition, bucket ORDER BY condition, bucket
Last updated 2023-07-13.