{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:DOUNEVZ4WPFSTK7C5IYVXPP2LS","short_pith_number":"pith:DOUNEVZ4","schema_version":"1.0","canonical_sha256":"1ba8d2573cb3cb29abe2ea315bbdfa5c9dd669d1fa584fd77bc6c7eedad30de0","source":{"kind":"arxiv","id":"2507.15778","version":2},"attestation_state":"computed","paper":{"title":"Stabilizing Knowledge, Promoting Reasoning: Dual-Token Constraints for RLVR","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fuzheng Zhang, Guorui Zhou, Jiakang Wang, Ling Pan, Runze Liu, Xiu Li","submitted_at":"2025-07-21T16:34:01Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has become an effective post-training method for improving the reasoning abilities of Large Language Models (LLMs). However, existing methods mainly apply uniform optimization constraints across all tokens, ignoring their heterogeneous roles. Prior work shows that high-entropy tokens are closely tied to reasoning, while low-entropy tokens primarily encode factual knowledge, and recent approaches attempt to exploit this distinction by isolating token updates via masking or asynchronous training. We argue that such isolation breaks the sequen"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2507.15778","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-07-21T16:34:01Z","cross_cats_sorted":[],"title_canon_sha256":"d7f7b081b9a47a2da71f5c5ccbc4c867bcf25a686a2ca5527b956f3bb0efec16","abstract_canon_sha256":"a04bd231108d166830be735f17c1d26f3ac349f7e553b2d478957e3449a2e952"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:22.214918Z","signature_b64":"S+FqJsMOiemSbAYmZYvErkjoya6lr+aj99CXLT2qnMVUPIUk8H4z6VA1vbtCim6CtxheRfMMZDBHwwffnBV3DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1ba8d2573cb3cb29abe2ea315bbdfa5c9dd669d1fa584fd77bc6c7eedad30de0","last_reissued_at":"2026-05-20T00:00:22.214132Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:22.214132Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Stabilizing Knowledge, Promoting Reasoning: Dual-Token Constraints for RLVR","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fuzheng Zhang, Guorui Zhou, Jiakang Wang, Ling Pan, Runze Liu, Xiu Li","submitted_at":"2025-07-21T16:34:01Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has become an effective post-training method for improving the reasoning abilities of Large Language Models (LLMs). However, existing methods mainly apply uniform optimization constraints across all tokens, ignoring their heterogeneous roles. Prior work shows that high-entropy tokens are closely tied to reasoning, while low-entropy tokens primarily encode factual knowledge, and recent approaches attempt to exploit this distinction by isolating token updates via masking or asynchronous training. We argue that such isolation breaks the sequen"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.15778","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.15778/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2507.15778","created_at":"2026-05-20T00:00:22.214263+00:00"},{"alias_kind":"arxiv_version","alias_value":"2507.15778v2","created_at":"2026-05-20T00:00:22.214263+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.15778","created_at":"2026-05-20T00:00:22.214263+00:00"},{"alias_kind":"pith_short_12","alias_value":"DOUNEVZ4WPFS","created_at":"2026-05-20T00:00:22.214263+00:00"},{"alias_kind":"pith_short_16","alias_value":"DOUNEVZ4WPFSTK7C","created_at":"2026-05-20T00:00:22.214263+00:00"},{"alias_kind":"pith_short_8","alias_value":"DOUNEVZ4","created_at":"2026-05-20T00:00:22.214263+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2602.09782","citing_title":"Flexible Entropy Control in RLVR with a Gradient-Preserving Perspective","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11491","citing_title":"Understanding and Preventing Entropy Collapse in RLVR with On-Policy Entropy Flow Optimization","ref_index":37,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS","json":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS.json","graph_json":"https://pith.science/api/pith-number/DOUNEVZ4WPFSTK7C5IYVXPP2LS/graph.json","events_json":"https://pith.science/api/pith-number/DOUNEVZ4WPFSTK7C5IYVXPP2LS/events.json","paper":"https://pith.science/paper/DOUNEVZ4"},"agent_actions":{"view_html":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS","download_json":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS.json","view_paper":"https://pith.science/paper/DOUNEVZ4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2507.15778&json=true","fetch_graph":"https://pith.science/api/pith-number/DOUNEVZ4WPFSTK7C5IYVXPP2LS/graph.json","fetch_events":"https://pith.science/api/pith-number/DOUNEVZ4WPFSTK7C5IYVXPP2LS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS/action/storage_attestation","attest_author":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS/action/author_attestation","sign_citation":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS/action/citation_signature","submit_replication":"https://pith.science/pith/DOUNEVZ4WPFSTK7C5IYVXPP2LS/action/replication_record"}},"created_at":"2026-05-20T00:00:22.214263+00:00","updated_at":"2026-05-20T00:00:22.214263+00:00"}