{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:RXWBANRUYDCFJBQKAIHRURPMW4","short_pith_number":"pith:RXWBANRU","schema_version":"1.0","canonical_sha256":"8dec103634c0c454860a020f1a45ecb717da0b181cbc36172041ec18dd7759e6","source":{"kind":"arxiv","id":"2606.19771","version":1},"attestation_state":"computed","paper":{"title":"Beyond Entropy: Learning from Token-Level Distributional Deviations for LLM Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Bing Guo, Haoxi Li, Jie Zhang, Jingcai Guo, Song Guo, Xuanzhi Feng, Yuming Jiang, Zeyu Liu, Zhengyang Li","submitted_at":"2026-06-18T04:11:56Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has significantly advanced Large Language Model (LLM) reasoning; however, it faces a fundamental optimization instability: uniform token updates precipitate entropy collapse, leading to premature convergence to suboptimal strategies, whereas excessive Shannon Entropy maximization can cause entropy explosion, driving blind exploration toward incoherent reasoning chains. To resolve this dichotomy, we introduce the Independent Combinatorial Tokens (ICT) framework, which shifts the optimization focus from scalar uncertainty to the distributiona"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.19771","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-18T04:11:56Z","cross_cats_sorted":[],"title_canon_sha256":"5c3a98685186f2864111ffc3b261243dbb00a2cb9fd5d522d4047f8aa3bd0462","abstract_canon_sha256":"2252d030f648d804bf9ae3e4d20fd874bc35b09f296f76dfe4330a3f47b2968f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:34.732194Z","signature_b64":"LYhtWoIKoM9ebQV91cvcjF8IXNbg8L/kHYJeJ/8j3ERR/gpL5w74YnGGZJk4QWvsZqdLVIkjFxrHD4C8CD0nCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8dec103634c0c454860a020f1a45ecb717da0b181cbc36172041ec18dd7759e6","last_reissued_at":"2026-06-19T16:12:34.731808Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:34.731808Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Beyond Entropy: Learning from Token-Level Distributional Deviations for LLM Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Bing Guo, Haoxi Li, Jie Zhang, Jingcai Guo, Song Guo, Xuanzhi Feng, Yuming Jiang, Zeyu Liu, Zhengyang Li","submitted_at":"2026-06-18T04:11:56Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has significantly advanced Large Language Model (LLM) reasoning; however, it faces a fundamental optimization instability: uniform token updates precipitate entropy collapse, leading to premature convergence to suboptimal strategies, whereas excessive Shannon Entropy maximization can cause entropy explosion, driving blind exploration toward incoherent reasoning chains. To resolve this dichotomy, we introduce the Independent Combinatorial Tokens (ICT) framework, which shifts the optimization focus from scalar uncertainty to the distributiona"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19771","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.19771/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.19771","created_at":"2026-06-19T16:12:34.731864+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.19771v1","created_at":"2026-06-19T16:12:34.731864+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19771","created_at":"2026-06-19T16:12:34.731864+00:00"},{"alias_kind":"pith_short_12","alias_value":"RXWBANRUYDCF","created_at":"2026-06-19T16:12:34.731864+00:00"},{"alias_kind":"pith_short_16","alias_value":"RXWBANRUYDCFJBQK","created_at":"2026-06-19T16:12:34.731864+00:00"},{"alias_kind":"pith_short_8","alias_value":"RXWBANRU","created_at":"2026-06-19T16:12:34.731864+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4","json":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4.json","graph_json":"https://pith.science/api/pith-number/RXWBANRUYDCFJBQKAIHRURPMW4/graph.json","events_json":"https://pith.science/api/pith-number/RXWBANRUYDCFJBQKAIHRURPMW4/events.json","paper":"https://pith.science/paper/RXWBANRU"},"agent_actions":{"view_html":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4","download_json":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4.json","view_paper":"https://pith.science/paper/RXWBANRU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.19771&json=true","fetch_graph":"https://pith.science/api/pith-number/RXWBANRUYDCFJBQKAIHRURPMW4/graph.json","fetch_events":"https://pith.science/api/pith-number/RXWBANRUYDCFJBQKAIHRURPMW4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4/action/storage_attestation","attest_author":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4/action/author_attestation","sign_citation":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4/action/citation_signature","submit_replication":"https://pith.science/pith/RXWBANRUYDCFJBQKAIHRURPMW4/action/replication_record"}},"created_at":"2026-06-19T16:12:34.731864+00:00","updated_at":"2026-06-19T16:12:34.731864+00:00"}