{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:NVTOTHPGQXFJ3LQ2Q5FQEWRHNG","short_pith_number":"pith:NVTOTHPG","canonical_record":{"source":{"id":"2606.08815","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T20:08:36Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"abea1b34652b1561c5ba7a46451dd3e15b74304310637e622047b3eb8f8d56b1","abstract_canon_sha256":"f4c0d750efd69549ad9d9b8bc5a55d5935c549cc7abbd95946b5caa15989227c"},"schema_version":"1.0"},"canonical_sha256":"6d66e99de685ca9dae1a874b025a2769be9f143215f86ce323cead81448640f6","source":{"kind":"arxiv","id":"2606.08815","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08815","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08815v1","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08815","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"pith_short_12","alias_value":"NVTOTHPGQXFJ","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"pith_short_16","alias_value":"NVTOTHPGQXFJ3LQ2","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"pith_short_8","alias_value":"NVTOTHPG","created_at":"2026-06-09T02:07:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:NVTOTHPGQXFJ3LQ2Q5FQEWRHNG","target":"record","payload":{"canonical_record":{"source":{"id":"2606.08815","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T20:08:36Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"abea1b34652b1561c5ba7a46451dd3e15b74304310637e622047b3eb8f8d56b1","abstract_canon_sha256":"f4c0d750efd69549ad9d9b8bc5a55d5935c549cc7abbd95946b5caa15989227c"},"schema_version":"1.0"},"canonical_sha256":"6d66e99de685ca9dae1a874b025a2769be9f143215f86ce323cead81448640f6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:40.987054Z","signature_b64":"5k1ith1cYSJjgooNiKCm1cAZFocDaJ6USZqEe8wZ3ZvCOtmh0GEFdb9trXqEwLFuTBI/hr6hDRpFoIlI/Cl4CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6d66e99de685ca9dae1a874b025a2769be9f143215f86ce323cead81448640f6","last_reissued_at":"2026-06-09T02:07:40.986240Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:40.986240Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.08815","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"llVa6PDQmTlU8dnVbqKSg07FCEiG4D/cNXUxFTy0Na1SlrMgzXV0EprlltM0SPiO3DaibVLj1wABQrVcF3n3Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T16:56:38.922145Z"},"content_sha256":"7d9e521fbacdc04df96218ab91f71ec63177b0d2390d16bb797145b2ac93c343","schema_version":"1.0","event_id":"sha256:7d9e521fbacdc04df96218ab91f71ec63177b0d2390d16bb797145b2ac93c343"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:NVTOTHPGQXFJ3LQ2Q5FQEWRHNG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Momentum for Reasoning: Dense Intrinsic Signals in Policy Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Haobo Wang, Hao Chen, Junbo Zhao, Liyao Li, Qi Zhang, Ru Peng, Xiaomeng Hu, Xiaoyu Shen, Xuhang Zhu, Yanyu Chen, Zhanming Shen","submitted_at":"2026-06-07T20:08:36Z","abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has emerged as a powerful paradigm for eliciting long-chain reasoning in large language models. However, existing methods based on Group Relative Policy Optimization (GRPO) rely on a binary outcome reward, which induces two structural failure modes: Zero-Advantage Collapse, in which all rollouts in a group share the same outcome and the gradient vanishes, and Hallucinated Certainty, in which the model becomes increasingly confident on incorrect rollouts late in training. We address both modes by densifying the reward with intrinsic signals "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08815","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08815/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UrXZgi7qSgvkaA3JrpznTjX9RKit77nmGyyi0OHrkGvKICQgmpcsEdOs38zUHTPQRC38uStevjgrghvDcfoHDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T16:56:38.922919Z"},"content_sha256":"306806d728865586ecb7ea6323c9cfab3be0d807d6c3e26a8e83a387090954fd","schema_version":"1.0","event_id":"sha256:306806d728865586ecb7ea6323c9cfab3be0d807d6c3e26a8e83a387090954fd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG/bundle.json","state_url":"https://pith.science/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T16:56:38Z","links":{"resolver":"https://pith.science/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG","bundle":"https://pith.science/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG/bundle.json","state":"https://pith.science/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NVTOTHPGQXFJ3LQ2Q5FQEWRHNG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NVTOTHPGQXFJ3LQ2Q5FQEWRHNG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f4c0d750efd69549ad9d9b8bc5a55d5935c549cc7abbd95946b5caa15989227c","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T20:08:36Z","title_canon_sha256":"abea1b34652b1561c5ba7a46451dd3e15b74304310637e622047b3eb8f8d56b1"},"schema_version":"1.0","source":{"id":"2606.08815","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08815","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08815v1","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08815","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"pith_short_12","alias_value":"NVTOTHPGQXFJ","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"pith_short_16","alias_value":"NVTOTHPGQXFJ3LQ2","created_at":"2026-06-09T02:07:40Z"},{"alias_kind":"pith_short_8","alias_value":"NVTOTHPG","created_at":"2026-06-09T02:07:40Z"}],"graph_snapshots":[{"event_id":"sha256:306806d728865586ecb7ea6323c9cfab3be0d807d6c3e26a8e83a387090954fd","target":"graph","created_at":"2026-06-09T02:07:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.08815/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has emerged as a powerful paradigm for eliciting long-chain reasoning in large language models. However, existing methods based on Group Relative Policy Optimization (GRPO) rely on a binary outcome reward, which induces two structural failure modes: Zero-Advantage Collapse, in which all rollouts in a group share the same outcome and the gradient vanishes, and Hallucinated Certainty, in which the model becomes increasingly confident on incorrect rollouts late in training. We address both modes by densifying the reward with intrinsic signals ","authors_text":"Haobo Wang, Hao Chen, Junbo Zhao, Liyao Li, Qi Zhang, Ru Peng, Xiaomeng Hu, Xiaoyu Shen, Xuhang Zhu, Yanyu Chen, Zhanming Shen","cross_cats":["cs.CL","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T20:08:36Z","title":"Momentum for Reasoning: Dense Intrinsic Signals in Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08815","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7d9e521fbacdc04df96218ab91f71ec63177b0d2390d16bb797145b2ac93c343","target":"record","created_at":"2026-06-09T02:07:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f4c0d750efd69549ad9d9b8bc5a55d5935c549cc7abbd95946b5caa15989227c","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T20:08:36Z","title_canon_sha256":"abea1b34652b1561c5ba7a46451dd3e15b74304310637e622047b3eb8f8d56b1"},"schema_version":"1.0","source":{"id":"2606.08815","kind":"arxiv","version":1}},"canonical_sha256":"6d66e99de685ca9dae1a874b025a2769be9f143215f86ce323cead81448640f6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6d66e99de685ca9dae1a874b025a2769be9f143215f86ce323cead81448640f6","first_computed_at":"2026-06-09T02:07:40.986240Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:40.986240Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5k1ith1cYSJjgooNiKCm1cAZFocDaJ6USZqEe8wZ3ZvCOtmh0GEFdb9trXqEwLFuTBI/hr6hDRpFoIlI/Cl4CQ==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:40.987054Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.08815","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7d9e521fbacdc04df96218ab91f71ec63177b0d2390d16bb797145b2ac93c343","sha256:306806d728865586ecb7ea6323c9cfab3be0d807d6c3e26a8e83a387090954fd"],"state_sha256":"6eecd50d896e751ad0537cd500a143759ec6b60cb94746dc873eb58afacb8ad5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/uUKIRILilCyE0mdJLoS2FM3+8oJUqEIunLoLMX32OPT5DfydxU4tcjEHgfMQWQDEEmLHPZ7uUucgJgt2GjfAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T16:56:38.927017Z","bundle_sha256":"470edf5e1ba1613a5a3520bdc013960264a175b9768f0a9b29ba52b27552608d"}}