WITH
users AS (
SELECT
* EXCEPT (timestamp), timestamp as registrationTimestamp
FROM
`logs.registeredUsers`),
initialReviews AS (
SELECT
DISTINCT userID,
registrationTimestamp,
schedule,
cardID
FROM
(SELECT * EXCEPT (reviewCount), RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS reviewCount FROM `logs.reviews`)
JOIN
`users`
USING
(userID)
WHERE
sessionID IS NULL AND reviewCount = 1 AND
cardID IN(
SELECT
cardID
FROM
`logs.latestEssaysCards`
WHERE
essayName="qcvc")
),
subsequentReviews AS (
SELECT
*,
TIMESTAMP_DIFF(MAX(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), MIN(timestamp) OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), DAY) AS span,
RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS realReviewCount,
FROM
`logs.reviews`
JOIN
initialReviews
USING
(userID,
cardID)
WHERE
sessionID IS NOT NULL AND isRetry IS NOT TRUE),
samples AS (
SELECT *, RANK() OVER (PARTITION BY cardID, userID ORDER BY timestamp) AS delayedRecallCount FROM subsequentReviews WHERE reviewMarking="remembered" AND beforeInterval > 1000*60*60*24
)
SELECT
CEIL(beforeInterval / (1000*60*60*24)) AS days,
APPROX_QUANTILES(realReviewCount - 1, 10),
COUNT(DISTINCT userID) AS N,
COUNT(*) AS promptN,
FROM
samples WHERE delayedRecallCount = 1
GROUP BY
days
ORDER BY
days