{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:25R6PVE3JLGJE5SWYI5BPZ2AGS","short_pith_number":"pith:25R6PVE3","canonical_record":{"source":{"id":"2606.30420","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:05:28Z","cross_cats_sorted":[],"title_canon_sha256":"d75d546f5dab66fadd01a6a7e467930906cd8739dbece8099b8c8ef344ad7a6c","abstract_canon_sha256":"daa55345d869c765d413d25efee80dc8dbf1818d9abc44adfc1d17a9b99f74e1"},"schema_version":"1.0"},"canonical_sha256":"d763e7d49b4acc927656c23a17e74034b6a896616c017f1089b0460ccdc56eb5","source":{"kind":"arxiv","id":"2606.30420","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.30420","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"arxiv_version","alias_value":"2606.30420v1","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30420","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"pith_short_12","alias_value":"25R6PVE3JLGJ","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"pith_short_16","alias_value":"25R6PVE3JLGJE5SW","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"pith_short_8","alias_value":"25R6PVE3","created_at":"2026-06-30T02:18:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:25R6PVE3JLGJE5SWYI5BPZ2AGS","target":"record","payload":{"canonical_record":{"source":{"id":"2606.30420","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:05:28Z","cross_cats_sorted":[],"title_canon_sha256":"d75d546f5dab66fadd01a6a7e467930906cd8739dbece8099b8c8ef344ad7a6c","abstract_canon_sha256":"daa55345d869c765d413d25efee80dc8dbf1818d9abc44adfc1d17a9b99f74e1"},"schema_version":"1.0"},"canonical_sha256":"d763e7d49b4acc927656c23a17e74034b6a896616c017f1089b0460ccdc56eb5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T02:18:14.607977Z","signature_b64":"J5XA4tHrRPSKm4lZPnDtXoPjSpAaiZydGIWEtUMohG1DhyF65EkbInjrLGqW0J9EU53Ra2cB2P2YtKFvZnr3Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d763e7d49b4acc927656c23a17e74034b6a896616c017f1089b0460ccdc56eb5","last_reissued_at":"2026-06-30T02:18:14.607472Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T02:18:14.607472Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.30420","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T02:18:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JGpNnW9UeBsgXF+MpvASD+2QP2NZy8P6Oyk8gzMMXCPlYCah8s9rHBvYdAxYs49wqqDxZwZSFEaGY4KzsM09Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T08:19:38.485680Z"},"content_sha256":"08705e70f32232cdb5fdae414f79032d2820f9c1e802dabdec73bdc115184067","schema_version":"1.0","event_id":"sha256:08705e70f32232cdb5fdae414f79032d2820f9c1e802dabdec73bdc115184067"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:25R6PVE3JLGJE5SWYI5BPZ2AGS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Experience Augmented Policy Optimization for LLM Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chiyu Ma, Guoyin Wang, Jinda Lu, Jinghan Li, Jingren Zhou, Junkang Wu, Kexin Huang, Shaohang Wei, Shuo Yang, Xiang Wang","submitted_at":"2026-06-29T15:05:28Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) is a powerful paradigm for improving the reasoning capabilities of large language models (LLMs). However, existing RLVR methods typically rely on on-policy optimization from scratch, resulting in high sampling costs and inefficient utilization of accumulated experience. As model capabilities and policy behaviors evolve during training, recent attempts to reuse experience via fixed reasoning trajectories further suffer from policy mismatch. Motivated by these limitations, we argue that experience in RLVR should not be reused as fixed reasoni"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30420","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.30420/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T02:18:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JBYL4y608c38mRliAZfTZbp1ghmyNBNWgTgcwI7yCiRmeF9KIcO5ASbajt8F8pfoBsKpyyZwS1GyScjtSiy4BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T08:19:38.486296Z"},"content_sha256":"5cc987451f32445fd54031e2850772852472e07582311908cd6d29ae2fbd703e","schema_version":"1.0","event_id":"sha256:5cc987451f32445fd54031e2850772852472e07582311908cd6d29ae2fbd703e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS/bundle.json","state_url":"https://pith.science/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-01T08:19:38Z","links":{"resolver":"https://pith.science/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS","bundle":"https://pith.science/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS/bundle.json","state":"https://pith.science/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/25R6PVE3JLGJE5SWYI5BPZ2AGS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:25R6PVE3JLGJE5SWYI5BPZ2AGS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"daa55345d869c765d413d25efee80dc8dbf1818d9abc44adfc1d17a9b99f74e1","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:05:28Z","title_canon_sha256":"d75d546f5dab66fadd01a6a7e467930906cd8739dbece8099b8c8ef344ad7a6c"},"schema_version":"1.0","source":{"id":"2606.30420","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.30420","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"arxiv_version","alias_value":"2606.30420v1","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30420","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"pith_short_12","alias_value":"25R6PVE3JLGJ","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"pith_short_16","alias_value":"25R6PVE3JLGJE5SW","created_at":"2026-06-30T02:18:14Z"},{"alias_kind":"pith_short_8","alias_value":"25R6PVE3","created_at":"2026-06-30T02:18:14Z"}],"graph_snapshots":[{"event_id":"sha256:5cc987451f32445fd54031e2850772852472e07582311908cd6d29ae2fbd703e","target":"graph","created_at":"2026-06-30T02:18:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.30420/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) is a powerful paradigm for improving the reasoning capabilities of large language models (LLMs). However, existing RLVR methods typically rely on on-policy optimization from scratch, resulting in high sampling costs and inefficient utilization of accumulated experience. As model capabilities and policy behaviors evolve during training, recent attempts to reuse experience via fixed reasoning trajectories further suffer from policy mismatch. Motivated by these limitations, we argue that experience in RLVR should not be reused as fixed reasoni","authors_text":"Chiyu Ma, Guoyin Wang, Jinda Lu, Jinghan Li, Jingren Zhou, Junkang Wu, Kexin Huang, Shaohang Wei, Shuo Yang, Xiang Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:05:28Z","title":"Experience Augmented Policy Optimization for LLM Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30420","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:08705e70f32232cdb5fdae414f79032d2820f9c1e802dabdec73bdc115184067","target":"record","created_at":"2026-06-30T02:18:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"daa55345d869c765d413d25efee80dc8dbf1818d9abc44adfc1d17a9b99f74e1","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:05:28Z","title_canon_sha256":"d75d546f5dab66fadd01a6a7e467930906cd8739dbece8099b8c8ef344ad7a6c"},"schema_version":"1.0","source":{"id":"2606.30420","kind":"arxiv","version":1}},"canonical_sha256":"d763e7d49b4acc927656c23a17e74034b6a896616c017f1089b0460ccdc56eb5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d763e7d49b4acc927656c23a17e74034b6a896616c017f1089b0460ccdc56eb5","first_computed_at":"2026-06-30T02:18:14.607472Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T02:18:14.607472Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"J5XA4tHrRPSKm4lZPnDtXoPjSpAaiZydGIWEtUMohG1DhyF65EkbInjrLGqW0J9EU53Ra2cB2P2YtKFvZnr3Cw==","signature_status":"signed_v1","signed_at":"2026-06-30T02:18:14.607977Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.30420","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:08705e70f32232cdb5fdae414f79032d2820f9c1e802dabdec73bdc115184067","sha256:5cc987451f32445fd54031e2850772852472e07582311908cd6d29ae2fbd703e"],"state_sha256":"438953dff72a68cd2d0311e3e357bfbebc9a6ec951895dcc0a0417b7703684d4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FmHOrNWHXgGCWU7L1Cl3dSOrTDqhN5pI9e9As3xhPSsgAq5CBxJcmk3L3i9IIEDzzrFZ8lVw4Zppq8q9vvh+DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-01T08:19:38.489534Z","bundle_sha256":"8ae846032a6cebf7562a713912a1f019b54eb05e52bdce4e58416ade1d60b08a"}}