20211126124426

WITH
  users AS (
  SELECT
    * EXCEPT (timestamp), timestamp as registrationTimestamp
  FROM
    `logs.registeredUsers`),
  initialReviews AS (
  SELECT
    DISTINCT userID,
    registrationTimestamp,
    schedule,
    cardID
  FROM
    (SELECT * EXCEPT (reviewCount), RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS reviewCount FROM `logs.reviews`)
  JOIN
    `users`
  USING
    (userID)
  WHERE
    sessionID IS NULL AND reviewCount = 1 AND
    cardID IN(
    SELECT
      cardID
    FROM
      `logs.latestEssaysCards`
    WHERE
      essayName="qcvc")
   ),
   achievers AS (SELECT DISTINCT userID, MIN(beforeInterval) / (1000*60*60*24) AS firstInterval FROM `logs.reviews` JOIN initialReviews USING (userID, cardID) WHERE sessionID IS NOT NULL AND isRetry IS NOT TRUE AND beforeInterval > 1000*60*60*24 AND reviewMarking="remembered" GROUP BY userID HAVING COUNT(DISTINCT cardID) > 50),
  subsequentReviews AS (
  SELECT
    *,
    TIMESTAMP_DIFF(MAX(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), MIN(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), DAY) AS span,
    RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS realReviewCount,
  FROM
    `logs.reviews`
  JOIN
    initialReviews
  USING
    (userID,
      cardID)
  WHERE
    sessionID IS NOT NULL AND isRetry IS NOT TRUE),
  samples AS (
    SELECT *, RANK() OVER (PARTITION BY cardID, userID ORDER BY timestamp) AS delayedRecallCount FROM subsequentReviews WHERE reviewMarking="remembered" AND beforeInterval > 1000*60*60*24
  ),
  byUser AS (
    SELECT userID, ANY_VALUE(firstInterval) AS firstInterval, SUM(realReviewCount) / COUNT(DISTINCT cardID) AS averageReviewCount FROM samples JOIN achievers USING (userID) WHERE delayedRecallCount = 1 GROUP BY userID
  )

SELECT 
firstInterval AS days,
APPROX_QUANTILES(averageReviewCount, 10),
  COUNT(DISTINCT userID) AS N,
  COUNT(*) AS promptN,
FROM
byUser
GROUP BY
days
ORDER BY
days
Last updated 2023-07-13.