20220127162236

WITH
  users AS (
  SELECT
    * EXCEPT (schedule), SUBSTR(schedule, 0, 9) AS schedule
  FROM
    `logs.registeredUsers` WHERE timestamp < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 120 DAY) AND timestamp >= TIMESTAMP("2021-04-19")),
  withRank AS (
  SELECT
    *,
    COUNTIF(sessionID IS NOT NULL AND isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS repetitionNumber,
    COUNTIF(reviewMarking = "remembered" AND isRetry IS NOT TRUE) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS correctCount,
  FROM
    `logs.reviews`
  WHERE
    cardID IN (
    SELECT
      cardID
    FROM
      `logs.latestEssaysCards`
    WHERE
      essayName="qcvc")),
  withAccuracies AS (
  SELECT
    userID,
    ANY_VALUE(schedule) AS schedule,
    COUNTIF(reviewMarking="remembered") / COUNT(*) AS accuracy,
    COUNT(DISTINCT sessionID) AS sessionCount
  FROM
    withRank
  JOIN
    users
  USING
    (userID)
  WHERE
    correctCount = 1 AND sessionID IS NOT NULL AND isRetry IS NOT TRUE
  GROUP BY
    userID
  HAVING
    COUNT(DISTINCT cardID) >= 103)
       
SELECT
  schedule,
  (
  SELECT
    COUNT(*)
  FROM
    users AS r
  WHERE
    r.schedule = a.schedule
  GROUP BY
    r.schedule) AS totalRegistered,

    (SELECT COUNT(*) FROM (SELECT userID, ANY_VALUE(schedule) AS schedule FROM withRank JOIN users USING (userID) WHERE reviewCount = 1 GROUP BY userID HAVING COUNT(*) >= 103) AS r WHERE r.schedule = a.schedule GROUP BY schedule) AS totalCollected,
  COUNT(*) AS N,
  APPROX_QUANTILES(accuracy, 4) AS quantiles
FROM
  withAccuracies AS a
GROUP BY
  schedule
ORDER BY
  schedule
Last updated 2023-07-13.