WITH
eligible AS (SELECT userID FROM `logs.registeredUsers` WHERE schedule="aggressiveStart"),
reviews AS (SELECT *, RANK() OVER (PARTITION BY cardID, userID ORDER BY timestamp) - 1 AS repetitionNumber, TIMESTAMP_DIFF(timestamp, LAG(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp), DAY) AS delay FROM `logs.reviews` WHERE isRetry IS NOT TRUE AND cardID IN (SELECT cardID FROM `logs.latestEssaysCards` WHERE essayName="qcvc")),
inEssayRecallRates AS (SELECT userID, COUNT(*) AS cardCount, COUNTIF(reviewMarking="remembered")/COUNT(*) AS essayRecallRate FROM reviews WHERE repetitionNumber = 0 GROUP BY userID HAVING cardCount >= 50),
completionists AS (SELECT userID FROM reviews JOIN inEssayRecallRates USING (userID) JOIN eligible USING (userID) WHERE repetitionNumber = 4 GROUP BY userID HAVING COUNT(*) >= ANY_VALUE(cardCount) * 0.95),
users AS (SELECT DISTINCT userID FROM inEssayRecallRates JOIN completionists USING (userID) WHERE essayRecallRate >= 0.831 AND essayRecallRate <= 0.957),
stats AS (SELECT repetitionNumber, cardID, SAFE_DIVIDE(COUNTIF(delay>=28), COUNT(*)) AS eligibleFraction, SAFE_DIVIDE(COUNTIF(delay>=28 AND reviewMarking="remembered"), COUNTIF(delay>=28)) AS successfulFraction, COUNTIF(delay>=28 AND reviewMarking="remembered")/COUNT(*) AS jointFraction, COUNT(*) AS N FROM reviews JOIN users USING (userID) GROUP BY repetitionNumber, cardID)
SELECT repetitionNumber, APPROX_QUANTILES(eligibleFraction, 10), APPROX_QUANTILES(successfulFraction, 10), APPROX_QUANTILES(jointFraction, 10), APPROX_QUANTILES(N, 10) FROM stats GROUP BY repetitionNumber ORDER BY repetitionNumber