{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AZEKMGBIPNRV2MDZOLW56UV3WS","short_pith_number":"pith:AZEKMGBI","canonical_record":{"source":{"id":"2605.07711","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-08T13:16:17Z","cross_cats_sorted":[],"title_canon_sha256":"e1c7ac2f36aebf3004462e603b6abae35161af6e3f38eab2ec516746969009cf","abstract_canon_sha256":"3f800750b512ccffc2eb8f1ace616347f65e3ae916b159cf2941e6db874ba8c1"},"schema_version":"1.0"},"canonical_sha256":"0648a618287b635d307972eddf52bbb48f30b8d2b472785c7a6cb012d0d1440e","source":{"kind":"arxiv","id":"2605.07711","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.07711","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"arxiv_version","alias_value":"2605.07711v2","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.07711","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"pith_short_12","alias_value":"AZEKMGBIPNRV","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"pith_short_16","alias_value":"AZEKMGBIPNRV2MDZ","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"pith_short_8","alias_value":"AZEKMGBI","created_at":"2026-05-22T01:04:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AZEKMGBIPNRV2MDZOLW56UV3WS","target":"record","payload":{"canonical_record":{"source":{"id":"2605.07711","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-08T13:16:17Z","cross_cats_sorted":[],"title_canon_sha256":"e1c7ac2f36aebf3004462e603b6abae35161af6e3f38eab2ec516746969009cf","abstract_canon_sha256":"3f800750b512ccffc2eb8f1ace616347f65e3ae916b159cf2941e6db874ba8c1"},"schema_version":"1.0"},"canonical_sha256":"0648a618287b635d307972eddf52bbb48f30b8d2b472785c7a6cb012d0d1440e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:05.429458Z","signature_b64":"cBe1xinzj73V0aGWo4LemLK4qaD5UyJbHDk2wsuHljrO1SiWYPGwUGrri6cS8VaAheJ8O9be7f6+aNwNXAh3Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0648a618287b635d307972eddf52bbb48f30b8d2b472785c7a6cb012d0d1440e","last_reissued_at":"2026-05-22T01:04:05.428943Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:05.428943Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.07711","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"H78QiQ2wsLAPAO3Gd1zS1VILgaSEIJE/c0s1ZBP4+sOmcX4HeS6fuzWyW7HiDz7rVJEEhKYQWcKi70mR5AELDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T16:10:38.986953Z"},"content_sha256":"ac41b38b20cd5c11fe98b574aa71ba8a7d3724f9c3534f15f275bbb5e2478d82","schema_version":"1.0","event_id":"sha256:ac41b38b20cd5c11fe98b574aa71ba8a7d3724f9c3534f15f275bbb5e2478d82"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AZEKMGBIPNRV2MDZOLW56UV3WS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SimCT: Recovering Lost Supervision for Cross-Tokenizer On-Policy Distillation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"SimCT restores supervision lost under heterogeneous tokenizers by supervising over short multi-token continuations in on-policy distillation.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bichuan Feng, Jie Sun, Junfeng Fang, Mao Zheng, Mingyang Song, Pengfei Liu, Qiyong Zhong, Xiang Wang, Yilin Cheng","submitted_at":"2026-05-08T13:16:17Z","abstract_excerpt":"On-policy distillation (OPD) is a standard tool for transferring teacher behavior to a smaller student, but it implicitly assumes that teacher and student predictions are comparable token by token, an assumption that fails whenever the two models tokenize the same text differently. Under heterogeneous tokenizers, exact shared-token matching silently discards a large fraction of the teacher signal at precisely the positions where vocabularies disagree. We propose \\textbf{\\underline{Sim}ple \\underline{C}ross-\\underline{T}okenizer OPD (SimCT)}, which restores this signal by enlarging the supervis"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We show that these units are the finest jointly tokenizable supervision interface, and that coarser alternatives remove teacher-student distinctions that are useful for on-policy learning. Across three heterogeneous teacher-student pairs on mathematical reasoning and code-generation benchmarks, SimCT shows consistent gains over shared-vocabulary OPD and representative cross-tokenizer baselines, with ablations confirming that the improvements come from recovering supervision discarded by exact shared-token matching.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That short multi-token continuations supply teacher-student distinctions that are both useful for on-policy learning and free of misalignment noise that would degrade the student.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SimCT recovers discarded teacher signal in cross-tokenizer on-policy distillation by enlarging supervision to jointly realizable multi-token continuations, yielding consistent gains on math reasoning and code generation tasks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"SimCT restores supervision lost under heterogeneous tokenizers by supervising over short multi-token continuations in on-policy distillation.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"fb13b849b4be3efa30d123d0558e5ebf91178ee47bca4a283f766d05b8e4b048"},"source":{"id":"2605.07711","kind":"arxiv","version":2},"verdict":{"id":"fc41bfac-283a-48e0-afbc-4a301fed72fb","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-11T02:57:24.265305Z","strongest_claim":"We show that these units are the finest jointly tokenizable supervision interface, and that coarser alternatives remove teacher-student distinctions that are useful for on-policy learning. Across three heterogeneous teacher-student pairs on mathematical reasoning and code-generation benchmarks, SimCT shows consistent gains over shared-vocabulary OPD and representative cross-tokenizer baselines, with ablations confirming that the improvements come from recovering supervision discarded by exact shared-token matching.","one_line_summary":"SimCT recovers discarded teacher signal in cross-tokenizer on-policy distillation by enlarging supervision to jointly realizable multi-token continuations, yielding consistent gains on math reasoning and code generation tasks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That short multi-token continuations supply teacher-student distinctions that are both useful for on-policy learning and free of misalignment noise that would degrade the student.","pith_extraction_headline":"SimCT restores supervision lost under heterogeneous tokenizers by supervising over short multi-token continuations in on-policy distillation."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.07711/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-20T10:22:02.832441Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-20T05:36:06.044701Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T16:01:18.531801Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T11:35:56.986881Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"ed28da327c275c3bfe59227a1c5168d9c02a23c9671bd43aed06705cc088a100"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"fc41bfac-283a-48e0-afbc-4a301fed72fb"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JCpu4J5LZIur3xM3pOUyLiyWEtO0MiZ/MSlq8iSfOjbTYnyABe4y9STNTRpUfEuD2vOD2z374Q7lbDlEkpXTBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T16:10:38.988070Z"},"content_sha256":"38547715a82d4d04281dab678bcc7e09fe3ab5528196a70485b9b3b0c64202d0","schema_version":"1.0","event_id":"sha256:38547715a82d4d04281dab678bcc7e09fe3ab5528196a70485b9b3b0c64202d0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AZEKMGBIPNRV2MDZOLW56UV3WS/bundle.json","state_url":"https://pith.science/pith/AZEKMGBIPNRV2MDZOLW56UV3WS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AZEKMGBIPNRV2MDZOLW56UV3WS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T16:10:38Z","links":{"resolver":"https://pith.science/pith/AZEKMGBIPNRV2MDZOLW56UV3WS","bundle":"https://pith.science/pith/AZEKMGBIPNRV2MDZOLW56UV3WS/bundle.json","state":"https://pith.science/pith/AZEKMGBIPNRV2MDZOLW56UV3WS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AZEKMGBIPNRV2MDZOLW56UV3WS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AZEKMGBIPNRV2MDZOLW56UV3WS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3f800750b512ccffc2eb8f1ace616347f65e3ae916b159cf2941e6db874ba8c1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-08T13:16:17Z","title_canon_sha256":"e1c7ac2f36aebf3004462e603b6abae35161af6e3f38eab2ec516746969009cf"},"schema_version":"1.0","source":{"id":"2605.07711","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.07711","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"arxiv_version","alias_value":"2605.07711v2","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.07711","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"pith_short_12","alias_value":"AZEKMGBIPNRV","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"pith_short_16","alias_value":"AZEKMGBIPNRV2MDZ","created_at":"2026-05-22T01:04:05Z"},{"alias_kind":"pith_short_8","alias_value":"AZEKMGBI","created_at":"2026-05-22T01:04:05Z"}],"graph_snapshots":[{"event_id":"sha256:38547715a82d4d04281dab678bcc7e09fe3ab5528196a70485b9b3b0c64202d0","target":"graph","created_at":"2026-05-22T01:04:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We show that these units are the finest jointly tokenizable supervision interface, and that coarser alternatives remove teacher-student distinctions that are useful for on-policy learning. Across three heterogeneous teacher-student pairs on mathematical reasoning and code-generation benchmarks, SimCT shows consistent gains over shared-vocabulary OPD and representative cross-tokenizer baselines, with ablations confirming that the improvements come from recovering supervision discarded by exact shared-token matching."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That short multi-token continuations supply teacher-student distinctions that are both useful for on-policy learning and free of misalignment noise that would degrade the student."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SimCT recovers discarded teacher signal in cross-tokenizer on-policy distillation by enlarging supervision to jointly realizable multi-token continuations, yielding consistent gains on math reasoning and code generation tasks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"SimCT restores supervision lost under heterogeneous tokenizers by supervising over short multi-token continuations in on-policy distillation."}],"snapshot_sha256":"fb13b849b4be3efa30d123d0558e5ebf91178ee47bca4a283f766d05b8e4b048"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-20T10:22:02.832441Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T05:36:06.044701Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T16:01:18.531801Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T11:35:56.986881Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.07711/integrity.json","findings":[],"snapshot_sha256":"ed28da327c275c3bfe59227a1c5168d9c02a23c9671bd43aed06705cc088a100","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"On-policy distillation (OPD) is a standard tool for transferring teacher behavior to a smaller student, but it implicitly assumes that teacher and student predictions are comparable token by token, an assumption that fails whenever the two models tokenize the same text differently. Under heterogeneous tokenizers, exact shared-token matching silently discards a large fraction of the teacher signal at precisely the positions where vocabularies disagree. We propose \\textbf{\\underline{Sim}ple \\underline{C}ross-\\underline{T}okenizer OPD (SimCT)}, which restores this signal by enlarging the supervis","authors_text":"Bichuan Feng, Jie Sun, Junfeng Fang, Mao Zheng, Mingyang Song, Pengfei Liu, Qiyong Zhong, Xiang Wang, Yilin Cheng","cross_cats":[],"headline":"SimCT restores supervision lost under heterogeneous tokenizers by supervising over short multi-token continuations in on-policy distillation.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-08T13:16:17Z","title":"SimCT: Recovering Lost Supervision for Cross-Tokenizer On-Policy Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.07711","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-11T02:57:24.265305Z","id":"fc41bfac-283a-48e0-afbc-4a301fed72fb","model_set":{"reader":"grok-4.3"},"one_line_summary":"SimCT recovers discarded teacher signal in cross-tokenizer on-policy distillation by enlarging supervision to jointly realizable multi-token continuations, yielding consistent gains on math reasoning and code generation tasks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"SimCT restores supervision lost under heterogeneous tokenizers by supervising over short multi-token continuations in on-policy distillation.","strongest_claim":"We show that these units are the finest jointly tokenizable supervision interface, and that coarser alternatives remove teacher-student distinctions that are useful for on-policy learning. Across three heterogeneous teacher-student pairs on mathematical reasoning and code-generation benchmarks, SimCT shows consistent gains over shared-vocabulary OPD and representative cross-tokenizer baselines, with ablations confirming that the improvements come from recovering supervision discarded by exact shared-token matching.","weakest_assumption":"That short multi-token continuations supply teacher-student distinctions that are both useful for on-policy learning and free of misalignment noise that would degrade the student."}},"verdict_id":"fc41bfac-283a-48e0-afbc-4a301fed72fb"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ac41b38b20cd5c11fe98b574aa71ba8a7d3724f9c3534f15f275bbb5e2478d82","target":"record","created_at":"2026-05-22T01:04:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3f800750b512ccffc2eb8f1ace616347f65e3ae916b159cf2941e6db874ba8c1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-08T13:16:17Z","title_canon_sha256":"e1c7ac2f36aebf3004462e603b6abae35161af6e3f38eab2ec516746969009cf"},"schema_version":"1.0","source":{"id":"2605.07711","kind":"arxiv","version":2}},"canonical_sha256":"0648a618287b635d307972eddf52bbb48f30b8d2b472785c7a6cb012d0d1440e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0648a618287b635d307972eddf52bbb48f30b8d2b472785c7a6cb012d0d1440e","first_computed_at":"2026-05-22T01:04:05.428943Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:05.428943Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"cBe1xinzj73V0aGWo4LemLK4qaD5UyJbHDk2wsuHljrO1SiWYPGwUGrri6cS8VaAheJ8O9be7f6+aNwNXAh3Bw==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:05.429458Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.07711","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ac41b38b20cd5c11fe98b574aa71ba8a7d3724f9c3534f15f275bbb5e2478d82","sha256:38547715a82d4d04281dab678bcc7e09fe3ab5528196a70485b9b3b0c64202d0"],"state_sha256":"7a3f5a89ebf279d1a52434a74336560dbbdadafb1a5baa3660c9177131a8ba11"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pm+aVvNuqeum6/W7UXugGsl6l+DgkClRzFuQTOYnYUqlfkKmVaHkQb388ooUrz774QiguQ3HTLP1Xn377o0xCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T16:10:38.993035Z","bundle_sha256":"6658a03bef4621f4eb3f2edfcbbcb329577b71c65d096342775c8e6eb1411807"}}