{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:C4JEF5LKFDAKQZ4BC3QKRUWXBO","short_pith_number":"pith:C4JEF5LK","canonical_record":{"source":{"id":"2606.28166","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-26T15:00:35Z","cross_cats_sorted":[],"title_canon_sha256":"bb029d7c64cd8103778c36e68184be1ad6d6dca4452fd6b84c820895359c86b9","abstract_canon_sha256":"2c3125c50cdf4ddadbb80bcde49db9af80f16a1da711da0a3bb3b282b7c6e9b3"},"schema_version":"1.0"},"canonical_sha256":"171242f56a28c0a8678116e0a8d2d70ba2c193c6324d41b1e424b2218fd71c2e","source":{"kind":"arxiv","id":"2606.28166","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28166","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28166v1","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28166","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_12","alias_value":"C4JEF5LKFDAK","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_16","alias_value":"C4JEF5LKFDAKQZ4B","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_8","alias_value":"C4JEF5LK","created_at":"2026-06-29T01:15:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:C4JEF5LKFDAKQZ4BC3QKRUWXBO","target":"record","payload":{"canonical_record":{"source":{"id":"2606.28166","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-26T15:00:35Z","cross_cats_sorted":[],"title_canon_sha256":"bb029d7c64cd8103778c36e68184be1ad6d6dca4452fd6b84c820895359c86b9","abstract_canon_sha256":"2c3125c50cdf4ddadbb80bcde49db9af80f16a1da711da0a3bb3b282b7c6e9b3"},"schema_version":"1.0"},"canonical_sha256":"171242f56a28c0a8678116e0a8d2d70ba2c193c6324d41b1e424b2218fd71c2e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-29T01:15:07.961223Z","signature_b64":"pe/NZIK3uOHR+gjoBwkfrGmIn/p2n7vuTeWBt8h+7OMlF/1qJ+DDZATAML2WbcML34FqMqXtY++YqeQf7EAuCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"171242f56a28c0a8678116e0a8d2d70ba2c193c6324d41b1e424b2218fd71c2e","last_reissued_at":"2026-06-29T01:15:07.960781Z","signature_status":"signed_v1","first_computed_at":"2026-06-29T01:15:07.960781Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.28166","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-29T01:15:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jPq7KRpGtHnUOX045wYLZYnpAgDjl6yza+cES0cbIEIZGWV98yeNaHggKE/uIZ98TWREHLEP9PkhdPVG2sJWBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T01:31:20.131988Z"},"content_sha256":"fd5cd59440a68cc353da4793d7923a8d50b30afad6f1aa12061f1d6e978b2f4f","schema_version":"1.0","event_id":"sha256:fd5cd59440a68cc353da4793d7923a8d50b30afad6f1aa12061f1d6e978b2f4f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:C4JEF5LKFDAKQZ4BC3QKRUWXBO","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Tandem Reinforcement Learning with Verifiable Rewards","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Ashton Anderson, Difan Jiao, Raghav Singhal, Robert West","submitted_at":"2026-06-26T15:00:35Z","abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has significantly improved the reasoning capability of large language models, reaching expert or even superhuman performance in domains such as competition math. However, whether weaker agents and humans can actually harness this capability is far less certain, with RLVR documented to drift reasoning toward idiosyncratic patterns such as poor readability and language mixing. Tandem training is a recently introduced paradigm that targets this compatibility problem: a trained, stronger senior co-generates each rollout with a frozen, weaker ju"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28166","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.28166/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-29T01:15:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QjsyA1H4VAaopx70xXpSVK+GqlnY0j/5wN0Na8R0d3U1pgf4cGpOL+/9qNOo2DPuhUtuvtrA7tpH0x6kFM5SAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T01:31:20.132655Z"},"content_sha256":"6c84c5298c3c33b738d0b5ba7347dc0d80647de8a3c1e228c64f47a4ee84d7f5","schema_version":"1.0","event_id":"sha256:6c84c5298c3c33b738d0b5ba7347dc0d80647de8a3c1e228c64f47a4ee84d7f5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO/bundle.json","state_url":"https://pith.science/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T01:31:20Z","links":{"resolver":"https://pith.science/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO","bundle":"https://pith.science/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO/bundle.json","state":"https://pith.science/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO/state.json","well_known_bundle":"https://pith.science/.well-known/pith/C4JEF5LKFDAKQZ4BC3QKRUWXBO/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:C4JEF5LKFDAKQZ4BC3QKRUWXBO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2c3125c50cdf4ddadbb80bcde49db9af80f16a1da711da0a3bb3b282b7c6e9b3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-26T15:00:35Z","title_canon_sha256":"bb029d7c64cd8103778c36e68184be1ad6d6dca4452fd6b84c820895359c86b9"},"schema_version":"1.0","source":{"id":"2606.28166","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28166","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28166v1","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28166","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_12","alias_value":"C4JEF5LKFDAK","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_16","alias_value":"C4JEF5LKFDAKQZ4B","created_at":"2026-06-29T01:15:07Z"},{"alias_kind":"pith_short_8","alias_value":"C4JEF5LK","created_at":"2026-06-29T01:15:07Z"}],"graph_snapshots":[{"event_id":"sha256:6c84c5298c3c33b738d0b5ba7347dc0d80647de8a3c1e228c64f47a4ee84d7f5","target":"graph","created_at":"2026-06-29T01:15:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.28166/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has significantly improved the reasoning capability of large language models, reaching expert or even superhuman performance in domains such as competition math. However, whether weaker agents and humans can actually harness this capability is far less certain, with RLVR documented to drift reasoning toward idiosyncratic patterns such as poor readability and language mixing. Tandem training is a recently introduced paradigm that targets this compatibility problem: a trained, stronger senior co-generates each rollout with a frozen, weaker ju","authors_text":"Ashton Anderson, Difan Jiao, Raghav Singhal, Robert West","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-26T15:00:35Z","title":"Tandem Reinforcement Learning with Verifiable Rewards"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28166","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fd5cd59440a68cc353da4793d7923a8d50b30afad6f1aa12061f1d6e978b2f4f","target":"record","created_at":"2026-06-29T01:15:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2c3125c50cdf4ddadbb80bcde49db9af80f16a1da711da0a3bb3b282b7c6e9b3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-26T15:00:35Z","title_canon_sha256":"bb029d7c64cd8103778c36e68184be1ad6d6dca4452fd6b84c820895359c86b9"},"schema_version":"1.0","source":{"id":"2606.28166","kind":"arxiv","version":1}},"canonical_sha256":"171242f56a28c0a8678116e0a8d2d70ba2c193c6324d41b1e424b2218fd71c2e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"171242f56a28c0a8678116e0a8d2d70ba2c193c6324d41b1e424b2218fd71c2e","first_computed_at":"2026-06-29T01:15:07.960781Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-29T01:15:07.960781Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pe/NZIK3uOHR+gjoBwkfrGmIn/p2n7vuTeWBt8h+7OMlF/1qJ+DDZATAML2WbcML34FqMqXtY++YqeQf7EAuCA==","signature_status":"signed_v1","signed_at":"2026-06-29T01:15:07.961223Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.28166","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fd5cd59440a68cc353da4793d7923a8d50b30afad6f1aa12061f1d6e978b2f4f","sha256:6c84c5298c3c33b738d0b5ba7347dc0d80647de8a3c1e228c64f47a4ee84d7f5"],"state_sha256":"96cdb03eb15980371a9713f4f6694d0dd06d3609f25e647c5bd17409f5c615b2"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EIM6IPoekiSIUn3nBemA/3d36gesr6ZwXY3RzYm7QGIZELQ5if+agogOMH0+iuDVsQD8BhB2r4M733E2oxb+Cw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T01:31:20.136006Z","bundle_sha256":"392f0a4ae557413bdf62ef2086219d3c9663c76762a36351359d411249f0e2e8"}}