20211126122806

WITH
  users AS (
  SELECT
    * EXCEPT (timestamp), timestamp as registrationTimestamp
  FROM
    `logs.registeredUsers`),
  initialReviews AS (
  SELECT
    DISTINCT userID,
    registrationTimestamp,
    schedule,
    cardID
  FROM
    (SELECT * EXCEPT (reviewCount), RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS reviewCount FROM `logs.reviews`)
  JOIN
    `users`
  USING
    (userID)
  WHERE
    sessionID IS NULL AND reviewCount = 1 AND
    cardID IN(
    SELECT
      cardID
    FROM
      `logs.latestEssaysCards`
    WHERE
      essayName="qcvc")
   ),
  subsequentReviews AS (
  SELECT
    *,
    TIMESTAMP_DIFF(MAX(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), MIN(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), DAY) AS span,
    RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS realReviewCount,
  FROM
    `logs.reviews`
  JOIN
    initialReviews
  USING
    (userID,
      cardID)
  WHERE
    sessionID IS NOT NULL AND isRetry IS NOT TRUE),
  samples AS (
    SELECT *, RANK() OVER (PARTITION BY cardID, userID ORDER BY timestamp) AS delayedRecallCount FROM subsequentReviews WHERE reviewMarking="remembered" AND beforeInterval > 1000*60*60*24
  )

SELECT 
CEIL(beforeInterval / (1000*60*60*24)) AS days,
APPROX_QUANTILES(realReviewCount - 1, 10),
  COUNT(DISTINCT userID) AS N,
  COUNT(*) AS promptN,
FROM
    samples WHERE delayedRecallCount = 1
GROUP BY
days
ORDER BY
days
Last updated 2023-07-13.