{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:B54ARZQYA3MSFUGPAVB2KQQPCM","short_pith_number":"pith:B54ARZQY","schema_version":"1.0","canonical_sha256":"0f7808e61806d922d0cf0543a5420f1318e861a3e8e631c0d51a827bd684e9d1","source":{"kind":"arxiv","id":"2602.12642","version":2},"attestation_state":"computed","paper":{"title":"Beyond Normalization: Rethinking the Partition Function as a Difficulty Scheduler for RLVR","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Dohyung Kim, Jeonghye Kim, Kyomin Jung, Minbeom Kim, Sangmook Lee, Sojeong Rhee","submitted_at":"2026-02-13T06:04:14Z","abstract_excerpt":"Reward-maximizing RL methods have shown to be capable of enhancing the reasoning performance of LLMs, but often lead to reduced generation diversity. Recent works address this issue by adopting GFlowNets, training LLMs to match a target distribution while jointly learning its partition function. In contrast to prior works that treat this partition function solely as a normalizer, we reinterpret it as a per-prompt expected-reward (i.e., online accuracy) signal, leveraging this unused information to improve sample efficiency. Specifically, we first establish a theoretical relationship between th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.12642","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-02-13T06:04:14Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"8917be4c8594ca12acf940fecd9af8a92fab34d3df6ce1695f22581b080c699c","abstract_canon_sha256":"c8834254ff7ab108975392a102bb3d26ee90e033dbd795be4f6c2d867b4b302e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:04.620796Z","signature_b64":"5B7Estnk6A26Dq/2cfTe+P6DZIwThwgyEhzmOS4NvaIyQZM4kjNsJG1gTh5Z24qK5916+VCCtqmK6LeH0PCmDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0f7808e61806d922d0cf0543a5420f1318e861a3e8e631c0d51a827bd684e9d1","last_reissued_at":"2026-05-29T01:05:04.619898Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:04.619898Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Beyond Normalization: Rethinking the Partition Function as a Difficulty Scheduler for RLVR","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Dohyung Kim, Jeonghye Kim, Kyomin Jung, Minbeom Kim, Sangmook Lee, Sojeong Rhee","submitted_at":"2026-02-13T06:04:14Z","abstract_excerpt":"Reward-maximizing RL methods have shown to be capable of enhancing the reasoning performance of LLMs, but often lead to reduced generation diversity. Recent works address this issue by adopting GFlowNets, training LLMs to match a target distribution while jointly learning its partition function. In contrast to prior works that treat this partition function solely as a normalizer, we reinterpret it as a per-prompt expected-reward (i.e., online accuracy) signal, leveraging this unused information to improve sample efficiency. Specifically, we first establish a theoretical relationship between th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.12642","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.12642/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.12642","created_at":"2026-05-29T01:05:04.620009+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.12642v2","created_at":"2026-05-29T01:05:04.620009+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.12642","created_at":"2026-05-29T01:05:04.620009+00:00"},{"alias_kind":"pith_short_12","alias_value":"B54ARZQYA3MS","created_at":"2026-05-29T01:05:04.620009+00:00"},{"alias_kind":"pith_short_16","alias_value":"B54ARZQYA3MSFUGP","created_at":"2026-05-29T01:05:04.620009+00:00"},{"alias_kind":"pith_short_8","alias_value":"B54ARZQY","created_at":"2026-05-29T01:05:04.620009+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM","json":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM.json","graph_json":"https://pith.science/api/pith-number/B54ARZQYA3MSFUGPAVB2KQQPCM/graph.json","events_json":"https://pith.science/api/pith-number/B54ARZQYA3MSFUGPAVB2KQQPCM/events.json","paper":"https://pith.science/paper/B54ARZQY"},"agent_actions":{"view_html":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM","download_json":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM.json","view_paper":"https://pith.science/paper/B54ARZQY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.12642&json=true","fetch_graph":"https://pith.science/api/pith-number/B54ARZQYA3MSFUGPAVB2KQQPCM/graph.json","fetch_events":"https://pith.science/api/pith-number/B54ARZQYA3MSFUGPAVB2KQQPCM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM/action/storage_attestation","attest_author":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM/action/author_attestation","sign_citation":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM/action/citation_signature","submit_replication":"https://pith.science/pith/B54ARZQYA3MSFUGPAVB2KQQPCM/action/replication_record"}},"created_at":"2026-05-29T01:05:04.620009+00:00","updated_at":"2026-05-29T01:05:04.620009+00:00"}