{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PTD65LSX2M2R3BXULUJO5Q7SXE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c522a13c994253ecb45d4197889cc7582ba966311a168f6695e21dd6d4d9178e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:00:57Z","title_canon_sha256":"eaf2659cc4471db9b1210af5f89624c2126d08882e71808f45882da09876f34d"},"schema_version":"1.0","source":{"id":"2605.14040","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14040","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14040v1","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14040","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"pith_short_12","alias_value":"PTD65LSX2M2R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PTD65LSX2M2R3BXU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PTD65LSX","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:d63c42c238bb028564c1167acd282caafa5efa3cbb4fff40dd750047d857664f","target":"graph","created_at":"2026-05-17T23:39:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Across 3 seeds, Physics-R1 lifts the audited corpus over the 8B base by +18.3 pp on PhysOlym-A liberal (8.0 -> 26.3 +/- 1.7; 7.1 pp behind Sonnet 4.5), +15.7 pp on PhysReason (23.9 -> 39.6 +/- 6.4; ahead of Qwen3-VL-32B and Gemini 2.5 Pro), +6.9 pp on OlympiadBench-Physics, and +4.1 pp on PhyX MCQ."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The three-stage audit (5-gram Jaccard then embedding cosine then LLM judge) has removed essentially all contamination and the new PhysOlym-A set is truly held-out with no overlap to any training data used for the base model or the recipe."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Audited olympiad corpus and Physics-R1 recipe improve 8B VLM by up to 18 points on held-out physics problems while exposing contamination in prior evals."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Audited olympiad corpus and RL recipe lift 8B vision model 18 points on physics reasoning."}],"snapshot_sha256":"c5fd5b8c73bddcbe9b3eb47395f15bcd82633177951a1ca522bc5b9dd052d6e5"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We audit the multimodal-physics evaluation pipeline end-to-end and document three undetected construction practices that distort how the field measures vision-language reasoning: train-eval contamination, translation drift, and MCQ saturation. (1) Public training pools (UGPhysics-Train, SciInstruct, MMK12) pass single-stage 5-gram-Jaccard audits with zero hits across all six public physics evals; a three-stage audit (Jaccard -> mxbai-embed-large cosine -> Haiku-4.5 LLM-judge) surfaces 134 near-duplicates and 4,846 paraphrase candidates in SciInstruct alone. (2) A 17-pp Sonnet 4.5 delta on 59 p","authors_text":"Shan Yang","cross_cats":[],"headline":"Audited olympiad corpus and RL recipe lift 8B vision model 18 points on physics reasoning.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:00:57Z","title":"Physics-R1: An Audited Olympiad Corpus and Recipe for Visual Physics Reasoning"},"references":{"count":52,"internal_anchors":3,"resolved_work":52,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Shen, Hui and Wu, Taiqiang and Han, Qi and others , journal=","work_id":"63d35c63-43bc-45e0-91e4-2f7ad0538808","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"He, Chaoqun and Luo, Renjie and Bai, Yuzhuo and Hu, Shengding and others , booktitle=","work_id":"162e041c-7707-46b5-8ca1-ed10e4fc72ba","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Xu, Xin and Xu, Qiyun and Xiao, Tong and others , journal=","work_id":"54060fa6-ed23-41ac-ba14-f825a38a45af","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Zhang, Xinyu and Dong, Yuxuan and Wu, Yanrui and others , journal=","work_id":"3c778b29-142e-47e5-8b61-f7a98dc895e4","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Yue, Xiang and Ni, Yuansheng and Zhang, Kai and Zheng, Tianyu and others , booktitle=","work_id":"fab91dec-8cbe-497b-b77d-7aed4a1876fe","year":null}],"snapshot_sha256":"816bb11743460de72db65b69c88cb31f068311902f186d88feb64c99c911120e"},"source":{"id":"2605.14040","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T05:31:35.296432Z","id":"0fae45ee-4962-4eb8-87d6-8681e27ec0a0","model_set":{"reader":"grok-4.3"},"one_line_summary":"Audited olympiad corpus and Physics-R1 recipe improve 8B VLM by up to 18 points on held-out physics problems while exposing contamination in prior evals.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Audited olympiad corpus and RL recipe lift 8B vision model 18 points on physics reasoning.","strongest_claim":"Across 3 seeds, Physics-R1 lifts the audited corpus over the 8B base by +18.3 pp on PhysOlym-A liberal (8.0 -> 26.3 +/- 1.7; 7.1 pp behind Sonnet 4.5), +15.7 pp on PhysReason (23.9 -> 39.6 +/- 6.4; ahead of Qwen3-VL-32B and Gemini 2.5 Pro), +6.9 pp on OlympiadBench-Physics, and +4.1 pp on PhyX MCQ.","weakest_assumption":"The three-stage audit (5-gram Jaccard then embedding cosine then LLM judge) has removed essentially all contamination and the new PhysOlym-A set is truly held-out with no overlap to any training data used for the base model or the recipe."}},"verdict_id":"0fae45ee-4962-4eb8-87d6-8681e27ec0a0"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8af9eee29c63af2351a9905375681a6f6c4ba25c74c5058244b52d640dc35f85","target":"record","created_at":"2026-05-17T23:39:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c522a13c994253ecb45d4197889cc7582ba966311a168f6695e21dd6d4d9178e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:00:57Z","title_canon_sha256":"eaf2659cc4471db9b1210af5f89624c2126d08882e71808f45882da09876f34d"},"schema_version":"1.0","source":{"id":"2605.14040","kind":"arxiv","version":1}},"canonical_sha256":"7cc7eeae57d3351d86f45d12eec3f2b9364d8962e2c0ac6816fa47a93ea0833d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7cc7eeae57d3351d86f45d12eec3f2b9364d8962e2c0ac6816fa47a93ea0833d","first_computed_at":"2026-05-17T23:39:12.752032Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:12.752032Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NahVIp2geAhbewDfWAPPduXyyxdgNWVdY9VrBVSBvOBMyxAYj+58inpI4XQuzxTa6+knDv6qMnqgGO9RzVL5Aw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:12.752551Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14040","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8af9eee29c63af2351a9905375681a6f6c4ba25c74c5058244b52d640dc35f85","sha256:d63c42c238bb028564c1167acd282caafa5efa3cbb4fff40dd750047d857664f"],"state_sha256":"0f9f941ead6b05d27796a63237bad099c85a1291d84eb4d378a5a93a65338c3b"}