WITH
successes AS (SELECT r.*, MIN(r.timestamp) OVER (PARTITION BY cardID, userID ORDER BY r.timestamp ASC) AS firstTimestamp FROM `logs.reviews` AS r JOIN `logs.latestEssaysCards` USING (cardID) WHERE essayName="qcvc" AND isRetry IS NOT TRUE AND reviewMarking = "remembered"),
withBestInterval AS (SELECT ANY_VALUE(r.sessionID) AS sessionID, r.userID AS userID, r.cardID AS cardID, r.timestamp AS timestamp, ANY_VALUE(r.beforeInterval) AS beforeInterval, MAX(r.nextDueTimestamp) OVER (PARTITION BY r.cardID, r.userID) AS maxTimestamp, ANY_VALUE(r.reviewMarking) AS reviewMarking, MAX(s.beforeInterval) AS priorBestInterval, ANY_VALUE(s.firstTimestamp) AS firstTimestamp FROM `logs.reviews` AS r JOIN successes AS s ON (r.cardID = s.cardID AND r.userID = s.userID AND r.timestamp > s.timestamp) WHERE r.isRetry IS NOT TRUE GROUP BY r.userID, r.cardID, r.timestamp, r.nextDueTimestamp),
withTimeTaken AS (SELECT *, TIMESTAMP_DIFF(timestamp, LAG(timestamp) OVER (PARTITION BY userID ORDER BY timestamp), MILLISECOND) / 1000 AS dt FROM withBestInterval WHERE priorBestInterval > 0),
traces AS (SELECT cardID, userID, COUNT(*) AS reviewCount, SUM(IF(dt > 60, 0, dt)) AS totalTimeTaken FROM withTimeTaken WHERE TIMESTAMP_DIFF(timestamp, firstTimestamp, DAY) < 365 AND sessionID IS NOT NULL GROUP BY cardID, userID HAVING (TIMESTAMP_DIFF(ANY_VALUE(maxTimestamp), ANY_VALUE(firstTimestamp), DAY) >= 365)),
perUser AS (SELECT userID, COUNT(DISTINCT cardID) AS cardCount, SUM(reviewCount) AS totalReviews, SUM(totalTimeTaken) AS totalTimeTaken FROM traces JOIN (SELECT * FROM `logs.registeredUsers` WHERE schedule="aggressiveStart") USING (userID) GROUP BY userID HAVING COUNT(*) >= 112)
SELECT APPROX_QUANTILES(totalReviews, 4), APPROX_QUANTILES(totalTimeTaken, 4), COUNT(*) FROM perUser