WITH
users AS (
SELECT
* EXCEPT (timestamp), timestamp as registrationTimestamp
FROM
`logs.registeredUsers` WHERE schedule = "original"),
initialReviews AS (
SELECT
DISTINCT userID,
registrationTimestamp,
schedule,
cardID
FROM
(SELECT * EXCEPT (reviewCount), RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS reviewCount FROM `logs.reviews`)
JOIN
`users`
USING
(userID)
WHERE
sessionID IS NULL AND reviewCount = 1 AND reviewMarking = "forgotten" AND
cardID IN(
SELECT
cardID
FROM
`logs.latestEssaysCards`
WHERE
essayName="qcvc")
),
subsequentReviews AS (
SELECT
*,
COUNTIF(reviewMarking="remembered") OVER (PARTITION BY userID, cardID ORDER BY timestamp ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS priorCorrectCount,
RANK() OVER (PARTITION BY userID, cardID ORDER BY timestamp) AS realReviewCount
FROM
`logs.reviews`
JOIN
initialReviews
USING
(userID,
cardID)
WHERE
sessionID IS NOT NULL AND isRetry IS NOT TRUE),
rememberedInFirstSession AS (
SELECT userID, cardID FROM subsequentReviews WHERE realReviewCount = 1 AND reviewMarking="remembered"
),
forgottenInSecondSession AS (
SELECT userID, cardID, timestamp, beforeInterval, priorCorrectCount, realReviewCount FROM subsequentReviews JOIN rememberedInFirstSession USING (userID, cardID) WHERE realReviewCount = 2 AND reviewMarking="forgotten"
)
SELECT * FROM `logs.reviews` JOIN (SELECT userID, cardID, priorCorrectCount, realReviewCount FROM forgottenInSecondSession) USING (userID, cardID) WHERE priorCorrectCount <= 2 ORDER BY userID, cardID, timestamp
-- SELECT
-- forgottenInSecondSession.beforeInterval / (1000*60*60*24),
-- COUNT(DISTINCT userID) AS N,
-- COUNT(*) AS promptN,
-- COUNTIF(reviewMarking="remembered") / COUNT(*) AS accuracy
-- FROM
-- subsequentReviews JOIN forgottenInSecondSession USING (userID, cardID)
-- WHERE
-- subsequentReviews.realReviewCount = forgottenInSecondSession.realReviewCount + 1
-- GROUP BY
-- forgottenInSecondSession.beforeInterval
-- ORDER BY
-- forgottenInSecondSession.beforeInterval