WITH
users AS (
SELECT
* EXCEPT (timestamp), timestamp as registrationTimestamp
FROM
`logs.registeredUsers` WHERE schedule != "original" AND schedule != "aggressiveStart"),
initialReviews AS (
SELECT
DISTINCT userID,
registrationTimestamp,
schedule,
cardID
FROM
(SELECT * EXCEPT (reviewCount), RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS reviewCount FROM `logs.reviews`)
JOIN
`users`
USING
(userID)
WHERE
sessionID IS NULL AND reviewCount = 1 AND reviewMarking = "remembered" AND
cardID IN UNNEST(["HpQN6NxQs6Y8kdS6XOVY",
"1ENYKL02RT8aidhI917F",
"DsKjxLEnyp7KzC93t1YX",
"6H3CMpGxVyuLBaJKZGhI",
"xiNW1zgeb2ITHGi6uQtg",
"s4N3L6iTy93DK5F0mh6i",
"s8duZcGBbu0dxb4xEAGg",
"oEceJqT6i3VlX5R0vyjT",
"Sgl5VJI79NB6b22EqTn3",
"z7cOpsVhlVn3HJWB2VET"])
),
subsequentReviews AS (
SELECT
*,
COUNTIF(reviewMarking="remembered") OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS priorCorrectCount,
RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS realReviewCount
FROM
`logs.reviews`
JOIN
initialReviews
USING
(userID,
cardID)
WHERE
sessionID IS NOT NULL AND isRetry IS NOT TRUE),
firstSession AS (
SELECT userID, cardID, beforeInterval, realReviewCount FROM subsequentReviews WHERE realReviewCount = 1 AND reviewMarking="forgotten"
)
-- SELECT * FROM `logs.reviews` JOIN (SELECT userID, cardID, beforeInterval, realReviewCount FROM firstSession) AS forgottenInSecondSession USING (userID, cardID) WHERE realReviewCount <= 3 ORDER BY userID, cardID, timestamp
SELECT
firstSession.beforeInterval / (1000*60*60*24),
COUNT(DISTINCT userID) AS N,
COUNT(*) AS promptN,
COUNTIF(reviewMarking="remembered") / COUNT(*) AS accuracy
FROM
subsequentReviews JOIN firstSession USING (userID, cardID)
WHERE
subsequentReviews.realReviewCount = firstSession.realReviewCount + 1
GROUP BY
firstSession.beforeInterval
ORDER BY
firstSession.beforeInterval