{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:SGZZBRKMMGFO2MM6FGCFYQJBBB","short_pith_number":"pith:SGZZBRKM","canonical_record":{"source":{"id":"2606.26797","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T09:32:58Z","cross_cats_sorted":[],"title_canon_sha256":"e57c0db14b96d8fbb0623884b9e257a9a0cef81f074a99592926650fe955d2d7","abstract_canon_sha256":"d3ab7aaa18f11038c3d68610c9dde78e0df9a5ca657c9e5cead0d1113d4f6e18"},"schema_version":"1.0"},"canonical_sha256":"91b390c54c618aed319e29845c4121084ec6fd0b5654a5dcc5d80b4ee156f2f6","source":{"kind":"arxiv","id":"2606.26797","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26797","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26797v1","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26797","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"pith_short_12","alias_value":"SGZZBRKMMGFO","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"pith_short_16","alias_value":"SGZZBRKMMGFO2MM6","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"pith_short_8","alias_value":"SGZZBRKM","created_at":"2026-06-26T01:16:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:SGZZBRKMMGFO2MM6FGCFYQJBBB","target":"record","payload":{"canonical_record":{"source":{"id":"2606.26797","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T09:32:58Z","cross_cats_sorted":[],"title_canon_sha256":"e57c0db14b96d8fbb0623884b9e257a9a0cef81f074a99592926650fe955d2d7","abstract_canon_sha256":"d3ab7aaa18f11038c3d68610c9dde78e0df9a5ca657c9e5cead0d1113d4f6e18"},"schema_version":"1.0"},"canonical_sha256":"91b390c54c618aed319e29845c4121084ec6fd0b5654a5dcc5d80b4ee156f2f6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:16:00.015964Z","signature_b64":"yNfNSi3dSRKRl4ojnE9MacoJBIOV6CQkoY/MEdG6a2EHM7OzUQwRMWlVL5db2/g6oln3aOeaUJQswyHyD6QdDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"91b390c54c618aed319e29845c4121084ec6fd0b5654a5dcc5d80b4ee156f2f6","last_reissued_at":"2026-06-26T01:16:00.015582Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:16:00.015582Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.26797","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:16:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EBvNVUSlbrLyX0KcEDGRewtLznSKHqMyZNTquPK+gBHlbXje3G7QlIA4DVF2shKB2W6E157u3Vq2jzbj2+d+Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T15:15:17.901511Z"},"content_sha256":"12e29b5a41872628099b467f7a2d22906f033d2401b679ee36539e2a386dd0ab","schema_version":"1.0","event_id":"sha256:12e29b5a41872628099b467f7a2d22906f033d2401b679ee36539e2a386dd0ab"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:SGZZBRKMMGFO2MM6FGCFYQJBBB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reasoning Quality Emerges Early: Data Curation for Reasoning Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Baharan Mirzasoleiman, Carlos Morato, Hongyi Henry Jin, Meysam Ghaffari, Wenhan Yang","submitted_at":"2026-06-25T09:32:58Z","abstract_excerpt":"Supervised fine-tuning (SFT) on a small, high-quality set of long reasoning traces is an effective approach for eliciting strong reasoning capabilities in Large Language Models (LLMs). However, existing methods for curating high-quality SFT data rely heavily on strong reasoning models to filter examples based on diversity and difficulty, making the curation process costly while often yielding suboptimal data quality. In this work, we show that diverse and challenging reasoning examples can be identified using only the initial reasoning tokens. Specifically, we demonstrate that difficult proble"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26797","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26797/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:16:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"w3M4NqANg63fxMaHPHPvSmTq3fIorzlk9Ops/pOxx6+7jgPfSimCcZQq7go8mlqDU0xx7c0f3Pu8tXZrpdKgBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T15:15:17.901927Z"},"content_sha256":"dcaf204b224a52b4aa37eccd8075f1163432cfdad0ccfdb244f96258041d7102","schema_version":"1.0","event_id":"sha256:dcaf204b224a52b4aa37eccd8075f1163432cfdad0ccfdb244f96258041d7102"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB/bundle.json","state_url":"https://pith.science/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T15:15:17Z","links":{"resolver":"https://pith.science/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB","bundle":"https://pith.science/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB/bundle.json","state":"https://pith.science/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SGZZBRKMMGFO2MM6FGCFYQJBBB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SGZZBRKMMGFO2MM6FGCFYQJBBB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d3ab7aaa18f11038c3d68610c9dde78e0df9a5ca657c9e5cead0d1113d4f6e18","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T09:32:58Z","title_canon_sha256":"e57c0db14b96d8fbb0623884b9e257a9a0cef81f074a99592926650fe955d2d7"},"schema_version":"1.0","source":{"id":"2606.26797","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26797","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26797v1","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26797","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"pith_short_12","alias_value":"SGZZBRKMMGFO","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"pith_short_16","alias_value":"SGZZBRKMMGFO2MM6","created_at":"2026-06-26T01:16:00Z"},{"alias_kind":"pith_short_8","alias_value":"SGZZBRKM","created_at":"2026-06-26T01:16:00Z"}],"graph_snapshots":[{"event_id":"sha256:dcaf204b224a52b4aa37eccd8075f1163432cfdad0ccfdb244f96258041d7102","target":"graph","created_at":"2026-06-26T01:16:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.26797/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Supervised fine-tuning (SFT) on a small, high-quality set of long reasoning traces is an effective approach for eliciting strong reasoning capabilities in Large Language Models (LLMs). However, existing methods for curating high-quality SFT data rely heavily on strong reasoning models to filter examples based on diversity and difficulty, making the curation process costly while often yielding suboptimal data quality. In this work, we show that diverse and challenging reasoning examples can be identified using only the initial reasoning tokens. Specifically, we demonstrate that difficult proble","authors_text":"Baharan Mirzasoleiman, Carlos Morato, Hongyi Henry Jin, Meysam Ghaffari, Wenhan Yang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T09:32:58Z","title":"Reasoning Quality Emerges Early: Data Curation for Reasoning Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26797","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:12e29b5a41872628099b467f7a2d22906f033d2401b679ee36539e2a386dd0ab","target":"record","created_at":"2026-06-26T01:16:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d3ab7aaa18f11038c3d68610c9dde78e0df9a5ca657c9e5cead0d1113d4f6e18","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T09:32:58Z","title_canon_sha256":"e57c0db14b96d8fbb0623884b9e257a9a0cef81f074a99592926650fe955d2d7"},"schema_version":"1.0","source":{"id":"2606.26797","kind":"arxiv","version":1}},"canonical_sha256":"91b390c54c618aed319e29845c4121084ec6fd0b5654a5dcc5d80b4ee156f2f6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"91b390c54c618aed319e29845c4121084ec6fd0b5654a5dcc5d80b4ee156f2f6","first_computed_at":"2026-06-26T01:16:00.015582Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T01:16:00.015582Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yNfNSi3dSRKRl4ojnE9MacoJBIOV6CQkoY/MEdG6a2EHM7OzUQwRMWlVL5db2/g6oln3aOeaUJQswyHyD6QdDA==","signature_status":"signed_v1","signed_at":"2026-06-26T01:16:00.015964Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.26797","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:12e29b5a41872628099b467f7a2d22906f033d2401b679ee36539e2a386dd0ab","sha256:dcaf204b224a52b4aa37eccd8075f1163432cfdad0ccfdb244f96258041d7102"],"state_sha256":"6a912acddc62af465a65942963310aa0a9172052cea2ab8b6ce0a1ac1330df5a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"c7KhU9QuqeTAqcftMqm+nQmJMEssgTeDOP9Z13G25udWEaFefZg37C7w96P4FjdlAmiiBK1sDsbEepmNQaImDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T15:15:17.906717Z","bundle_sha256":"51f8751a7883903c4e13f1c932be3625763e601419740c6e5e1d064a3eb8a70e"}}