WITH
users AS (
SELECT
* EXCEPT (schedule), SUBSTR(schedule, 0, 9) AS schedule
FROM
`logs.registeredUsers` WHERE timestamp < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 120 DAY) AND timestamp >= TIMESTAMP("2021-04-19")),
withRank AS (
SELECT
*,
COUNTIF(sessionID IS NOT NULL AND isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS repetitionNumber,
COUNTIF(reviewMarking = "remembered" AND isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS correctCount,
FROM
`logs.reviews`
WHERE
cardID IN (
SELECT
cardID
FROM
`logs.latestEssaysCards`
WHERE
essayName="qcvc")),
withAccuracies AS (
SELECT
userID,
ANY_VALUE(schedule) AS schedule,
COUNTIF(reviewMarking="remembered") / COUNT(*) AS accuracy,
COUNT(DISTINCT sessionID) AS sessionCount
FROM
withRank
JOIN
users
USING
(userID)
WHERE
correctCount = 1 AND sessionID IS NOT NULL AND isRetry IS NOT TRUE
GROUP BY
userID
HAVING
COUNT(DISTINCT cardID) >= 103)
SELECT
schedule,
(
SELECT
COUNT(*)
FROM
users AS r
WHERE
r.schedule = a.schedule
GROUP BY
r.schedule) AS totalRegistered,
(SELECT COUNT(*) FROM (SELECT userID, ANY_VALUE(schedule) AS schedule FROM withRank JOIN users USING (userID) WHERE reviewCount = 1 GROUP BY userID HAVING COUNT(*) >= 103) AS r WHERE r.schedule = a.schedule GROUP BY schedule) AS totalCollected,
COUNT(*) AS N,
APPROX_QUANTILES(accuracy, 4) AS quantiles
FROM
withAccuracies AS a
GROUP BY
schedule
ORDER BY
schedule