{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:3LTBNC3AN6KEBJJOW64G4RICZG","short_pith_number":"pith:3LTBNC3A","schema_version":"1.0","canonical_sha256":"dae6168b606f9440a52eb7b86e4502c990c174c9ab51204e56afeb19f7ac044c","source":{"kind":"arxiv","id":"2508.05953","version":2},"attestation_state":"computed","paper":{"title":"SCALEFeedback: A Large-Scale Dataset of Synthetic Computer Science Assignments for LLM-generated Educational Feedback Research","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CY","authors_text":"Dragan Ga\\v{s}evi\\'c, Flora Jin, Guanliang Chen, Kaixun Yang, Keyang Qian, Lixiang Yan, Rui Guan, Sadia Nawaz, Wei Dai, Yixin Cheng, Zachari Swiecki","submitted_at":"2025-08-08T02:37:20Z","abstract_excerpt":"Using Large Language Models (LLMs) to give educational feedback to students for their assignments has attracted much attention in the AI in Education (AIED) field. Yet, there is currently no large-scale open-source dataset of student assignments that includes detailed assignment descriptions, rubrics, and student submissions across various courses. As a result, research on generalisable methodology for automatic generation of effective and responsible educational feedback remains limited. In this paper, we introduce a synthetic computer science university assignment dataset for LLM-based educa"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2508.05953","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2025-08-08T02:37:20Z","cross_cats_sorted":[],"title_canon_sha256":"6a6e06dd10612c2ee8d7ae0867f367bdaa27009e4160f1c16079fa8ed3c115ea","abstract_canon_sha256":"c3e9edcd7dfab85f42f0289be819e6efba9ffc2bf5eaf3683ed67ac0d9f10c41"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T00:16:50.232566Z","signature_b64":"JMzsa0Us7mfzgn+eiLq9DPEUtJ20kOPHIbgXJwLvUZjtkL8uA9b0QN+ZyRiEKtvM8MgqfQPYABOR/6QZGVbTCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dae6168b606f9440a52eb7b86e4502c990c174c9ab51204e56afeb19f7ac044c","last_reissued_at":"2026-07-03T00:16:50.232064Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T00:16:50.232064Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SCALEFeedback: A Large-Scale Dataset of Synthetic Computer Science Assignments for LLM-generated Educational Feedback Research","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CY","authors_text":"Dragan Ga\\v{s}evi\\'c, Flora Jin, Guanliang Chen, Kaixun Yang, Keyang Qian, Lixiang Yan, Rui Guan, Sadia Nawaz, Wei Dai, Yixin Cheng, Zachari Swiecki","submitted_at":"2025-08-08T02:37:20Z","abstract_excerpt":"Using Large Language Models (LLMs) to give educational feedback to students for their assignments has attracted much attention in the AI in Education (AIED) field. Yet, there is currently no large-scale open-source dataset of student assignments that includes detailed assignment descriptions, rubrics, and student submissions across various courses. As a result, research on generalisable methodology for automatic generation of effective and responsible educational feedback remains limited. In this paper, we introduce a synthetic computer science university assignment dataset for LLM-based educa"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.05953","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.05953/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2508.05953","created_at":"2026-07-03T00:16:50.232122+00:00"},{"alias_kind":"arxiv_version","alias_value":"2508.05953v2","created_at":"2026-07-03T00:16:50.232122+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.05953","created_at":"2026-07-03T00:16:50.232122+00:00"},{"alias_kind":"pith_short_12","alias_value":"3LTBNC3AN6KE","created_at":"2026-07-03T00:16:50.232122+00:00"},{"alias_kind":"pith_short_16","alias_value":"3LTBNC3AN6KEBJJO","created_at":"2026-07-03T00:16:50.232122+00:00"},{"alias_kind":"pith_short_8","alias_value":"3LTBNC3A","created_at":"2026-07-03T00:16:50.232122+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG","json":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG.json","graph_json":"https://pith.science/api/pith-number/3LTBNC3AN6KEBJJOW64G4RICZG/graph.json","events_json":"https://pith.science/api/pith-number/3LTBNC3AN6KEBJJOW64G4RICZG/events.json","paper":"https://pith.science/paper/3LTBNC3A"},"agent_actions":{"view_html":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG","download_json":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG.json","view_paper":"https://pith.science/paper/3LTBNC3A","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2508.05953&json=true","fetch_graph":"https://pith.science/api/pith-number/3LTBNC3AN6KEBJJOW64G4RICZG/graph.json","fetch_events":"https://pith.science/api/pith-number/3LTBNC3AN6KEBJJOW64G4RICZG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG/action/storage_attestation","attest_author":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG/action/author_attestation","sign_citation":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG/action/citation_signature","submit_replication":"https://pith.science/pith/3LTBNC3AN6KEBJJOW64G4RICZG/action/replication_record"}},"created_at":"2026-07-03T00:16:50.232122+00:00","updated_at":"2026-07-03T00:16:50.232122+00:00"}