WITH conditions AS (SELECT userID, JSON_EXTRACT_SCALAR(extra, "$['condition']") AS condition FROM `metabook-qcc.logs.userEvents` WHERE timestamp >= TIMESTAMP("2020-01-28") AND JSON_EXTRACT_SCALAR(extra, "$['experimentName']") = "efficacy2" ORDER BY condition),
enrolledUsers AS (SELECT userID, condition, timestamp as enrollmentTimestamp FROM (SELECT * FROM `metabook-qcc.logs.userEvents` WHERE eventName="efficacy2Enroll") JOIN conditions USING (userID)),
cards AS (SELECT * FROM UNNEST(([ "h1AXHXVtsGKxkamS8Hb2",
"6NRxe1wnphNsMjTZ9LXf",
"xQte6vwqUwy2j5OWdxjK",
"1ENYKL02RT8aidhI917F",
"zRgw2osbesLEQ5ADo8DD",
"E4g8wyr2folQ87Slf3ZO",
"7QM6pEJ9bxZHK50e1kww",
"xiNW1zgeb2ITHGi6uQtg",
"s8duZcGBbu0dxb4xEAGg"
])) AS cardID),
relevantReviews AS (SELECT userID, cardID, reviewMarking, timestamp, sessionID FROM `metabook-qcc.logs.reviews` WHERE timestamp >= TIMESTAMP("2020-01-28") AND userID IN (SELECT userID FROM enrolledUsers) AND cardID IN (SELECT cardID FROM cards)),
samples AS (SELECT userID, cardID, reviewMarking, COUNT(*) OVER (PARTITION BY userID) AS reviewCount FROM (SELECT *, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) as reviewNumber FROM relevantReviews WHERE sessionID IS NOT NULL AND timestamp >= TIMESTAMP_ADD((SELECT enrollmentTimestamp FROM enrolledUsers WHERE enrolledUsers.userID = relevantReviews.userID), INTERVAL 30 DAY)) WHERE reviewNumber = 1),
firstReviewScores AS (SELECT userID, cardID, reviewMarking, COUNT(*) OVER (PARTITION BY userID) AS reviewCount FROM (SELECT *, RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp ASC) as reviewNumber FROM relevantReviews WHERE sessionID IS NOT NULL) WHERE reviewNumber = 1),
results AS (SELECT condition, cardID, COUNTIF(reviewMarking="remembered")/COUNT(*) AS userFraction, ANY_VALUE(question) AS question, COUNT(*) AS N FROM samples JOIN enrolledUsers USING (userID) JOIN `logs.latestEssaysCards` USING (cardID) GROUP BY condition, cardID),
eligibleUsers AS (SELECT userID FROM samples JOIN enrolledUsers USING (userID) WHERE condition = "delay1Month" GROUP BY userID HAVING COUNT(*) = 9),
userSamples AS (SELECT COUNTIF(reviewMarking="remembered") AS correctCount FROM samples JOIN eligibleUsers USING (userID) GROUP BY userID)
SELECT APPROX_QUANTILES(correctCount, 4), COUNT(*) AS N FROM userSamples
-- SELECT cardID, firstReviewFraction, noReviewFraction, log(noReviewFraction / firstReviewFraction) AS logLikelihoodRatio, firstReviewN, noReviewN, question FROM (SELECT userFraction AS firstReviewFraction, N AS firstReviewN, cardID FROM firstReviewResults) JOIN (SELECT userFraction AS noReviewFraction, cardID, question, N AS noReviewN FROM results WHERE condition="delay1Month") USING (cardID) ORDER BY noReviewFraction ASC