{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DGRLDIVIBLMI5RH3YGUMYNTWT3","short_pith_number":"pith:DGRLDIVI","canonical_record":{"source":{"id":"2606.22570","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-21T16:14:46Z","cross_cats_sorted":[],"title_canon_sha256":"9b991203f5ae27d05b85a9394d5b10f11db0ec285f670f4fe2470fcc09597ab1","abstract_canon_sha256":"8b857f21bf0f8e3b4be87256b2e34409bc58d079ff26a229aeb0150bd3125071"},"schema_version":"1.0"},"canonical_sha256":"19a2b1a2a80ad88ec4fbc1a8cc36769ee8eebddd5a90061cc9cd0483cfbcf87f","source":{"kind":"arxiv","id":"2606.22570","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.22570","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"arxiv_version","alias_value":"2606.22570v1","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.22570","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"pith_short_12","alias_value":"DGRLDIVIBLMI","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"pith_short_16","alias_value":"DGRLDIVIBLMI5RH3","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"pith_short_8","alias_value":"DGRLDIVI","created_at":"2026-06-23T02:13:41Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DGRLDIVIBLMI5RH3YGUMYNTWT3","target":"record","payload":{"canonical_record":{"source":{"id":"2606.22570","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-21T16:14:46Z","cross_cats_sorted":[],"title_canon_sha256":"9b991203f5ae27d05b85a9394d5b10f11db0ec285f670f4fe2470fcc09597ab1","abstract_canon_sha256":"8b857f21bf0f8e3b4be87256b2e34409bc58d079ff26a229aeb0150bd3125071"},"schema_version":"1.0"},"canonical_sha256":"19a2b1a2a80ad88ec4fbc1a8cc36769ee8eebddd5a90061cc9cd0483cfbcf87f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:13:41.888772Z","signature_b64":"ZkuiMfLTApsXZQ+zygVMSVG2Ef7/RMdOXXJMD6gk/Cs3C8cyk6fdUSd5lW5R6cl2rD62NHW8aofXC79f31NMBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"19a2b1a2a80ad88ec4fbc1a8cc36769ee8eebddd5a90061cc9cd0483cfbcf87f","last_reissued_at":"2026-06-23T02:13:41.888329Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:13:41.888329Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.22570","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T02:13:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nr8GTpCDhp7W61bPiRj2CJznxvalN7Ul66nZNrIcP5t+Aaq9z/2thiIgOiTN9meIHYwjmfsCYTExnosHbDNXAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T20:24:34.479587Z"},"content_sha256":"d330d290adea66eed303a6ba26a05863f7847e4dd4afdb10b4b8f201ae0d69f5","schema_version":"1.0","event_id":"sha256:d330d290adea66eed303a6ba26a05863f7847e4dd4afdb10b4b8f201ae0d69f5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DGRLDIVIBLMI5RH3YGUMYNTWT3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"What are Key Factors for Updates in RL for LLM Reasoning?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Demi Wang, Dongsheng Li, Jiahang Xu, Peidong Wang, Shi Feng, Xiaocui Yang, Xufang Luo, Yuqing Yang","submitted_at":"2026-06-21T16:14:46Z","abstract_excerpt":"Reinforcement Learning from Verifiable Rewards (RLVR) has emerged as a promising framework for enhancing the reasoning ability of large language models. However, much of the existing work is guided by heuristic intuition, leading to divergent algorithmic choices, even contradictory ones that nevertheless report empirical gains. To better understand this phenomenon, we conduct a theoretical analysis of RLVR updates. Our study reveals that differences in off-policy degree, determined by the number of gradient steps per rollout, substantially affect the distribution of importance sampling ratios "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.22570","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.22570/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T02:13:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PwyPprFzTCzqv3mpLqRDZWkJpcEMn/7Q5rwQ8CraFJum8gzst6jXUoLdrrhayOHTZuQVkjXOXdFfhDpu/eh5Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T20:24:34.479987Z"},"content_sha256":"c92dba84cc350ab844189fb13cb87653743124c6be734af3c2f9fb436004f89d","schema_version":"1.0","event_id":"sha256:c92dba84cc350ab844189fb13cb87653743124c6be734af3c2f9fb436004f89d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3/bundle.json","state_url":"https://pith.science/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T20:24:34Z","links":{"resolver":"https://pith.science/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3","bundle":"https://pith.science/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3/bundle.json","state":"https://pith.science/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DGRLDIVIBLMI5RH3YGUMYNTWT3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DGRLDIVIBLMI5RH3YGUMYNTWT3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8b857f21bf0f8e3b4be87256b2e34409bc58d079ff26a229aeb0150bd3125071","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-21T16:14:46Z","title_canon_sha256":"9b991203f5ae27d05b85a9394d5b10f11db0ec285f670f4fe2470fcc09597ab1"},"schema_version":"1.0","source":{"id":"2606.22570","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.22570","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"arxiv_version","alias_value":"2606.22570v1","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.22570","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"pith_short_12","alias_value":"DGRLDIVIBLMI","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"pith_short_16","alias_value":"DGRLDIVIBLMI5RH3","created_at":"2026-06-23T02:13:41Z"},{"alias_kind":"pith_short_8","alias_value":"DGRLDIVI","created_at":"2026-06-23T02:13:41Z"}],"graph_snapshots":[{"event_id":"sha256:c92dba84cc350ab844189fb13cb87653743124c6be734af3c2f9fb436004f89d","target":"graph","created_at":"2026-06-23T02:13:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.22570/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement Learning from Verifiable Rewards (RLVR) has emerged as a promising framework for enhancing the reasoning ability of large language models. However, much of the existing work is guided by heuristic intuition, leading to divergent algorithmic choices, even contradictory ones that nevertheless report empirical gains. To better understand this phenomenon, we conduct a theoretical analysis of RLVR updates. Our study reveals that differences in off-policy degree, determined by the number of gradient steps per rollout, substantially affect the distribution of importance sampling ratios ","authors_text":"Demi Wang, Dongsheng Li, Jiahang Xu, Peidong Wang, Shi Feng, Xiaocui Yang, Xufang Luo, Yuqing Yang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-21T16:14:46Z","title":"What are Key Factors for Updates in RL for LLM Reasoning?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.22570","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d330d290adea66eed303a6ba26a05863f7847e4dd4afdb10b4b8f201ae0d69f5","target":"record","created_at":"2026-06-23T02:13:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8b857f21bf0f8e3b4be87256b2e34409bc58d079ff26a229aeb0150bd3125071","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-21T16:14:46Z","title_canon_sha256":"9b991203f5ae27d05b85a9394d5b10f11db0ec285f670f4fe2470fcc09597ab1"},"schema_version":"1.0","source":{"id":"2606.22570","kind":"arxiv","version":1}},"canonical_sha256":"19a2b1a2a80ad88ec4fbc1a8cc36769ee8eebddd5a90061cc9cd0483cfbcf87f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"19a2b1a2a80ad88ec4fbc1a8cc36769ee8eebddd5a90061cc9cd0483cfbcf87f","first_computed_at":"2026-06-23T02:13:41.888329Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T02:13:41.888329Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ZkuiMfLTApsXZQ+zygVMSVG2Ef7/RMdOXXJMD6gk/Cs3C8cyk6fdUSd5lW5R6cl2rD62NHW8aofXC79f31NMBQ==","signature_status":"signed_v1","signed_at":"2026-06-23T02:13:41.888772Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.22570","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d330d290adea66eed303a6ba26a05863f7847e4dd4afdb10b4b8f201ae0d69f5","sha256:c92dba84cc350ab844189fb13cb87653743124c6be734af3c2f9fb436004f89d"],"state_sha256":"9bc487d688403eeda02080ce182c82588807e9aa5bbd3ce87ea922d925746d88"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T2IboXhm4SQ6XixdeYpTA6QeezsBOflxXmgXtSTkuawHKNkVcaD65laYdXDlAuSMG0wpgg17wlaYTkTFF5VmDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T20:24:34.482058Z","bundle_sha256":"70caa1145cfdbcc18d2561a1dc3f78d639b3fd74796897946bf60f7aaab65591"}}