CREATE MODEL
`temp.questionModel`
OPTIONS
( MODEL_TYPE='KMEANS',
NUM_CLUSTERS=4) AS
WITH
cards AS (SELECT cardID, question, answer, essayOrder FROM `logs.latestEssaysCards` WHERE essayName="qcvc"),
samples AS (SELECT * FROM `logs.reviews` JOIN cards USING (cardID) WHERE reviewCount=1),
users AS (SELECT DISTINCT userID FROM samples GROUP BY userID HAVING COUNT(*) = 112),
withAllAnswered AS (SELECT * FROM samples JOIN users USING (userID))
SELECT ARRAY_AGG(userID) AS responses FROM withAllAnswered WHERE reviewMarking="remembered" GROUP BY cardID
And to evaluate:
WITH
cards AS (SELECT cardID, question, answer, essayOrder FROM `logs.latestEssaysCards` WHERE essayName="qcvc"),
samples AS (SELECT * FROM `logs.reviews` JOIN cards USING (cardID) WHERE reviewCount=1),
users AS (SELECT DISTINCT userID FROM samples GROUP BY userID HAVING COUNT(*) = 112),
withAllAnswered AS (SELECT * FROM samples JOIN users USING (userID)),
inputs AS (SELECT cardID, ARRAY_AGG(userID) AS responses FROM withAllAnswered WHERE reviewMarking="remembered" GROUP BY cardID)
SELECT * EXCEPT (responses) FROM ML.PREDICT(MODEL `metabook-qcc.temp.questionModel`, TABLE inputs) JOIN cards USING (cardID) ORDER BY CENTROID_ID