{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:3KXB7CHVKSC4J3LATO6KLL7DWU","short_pith_number":"pith:3KXB7CHV","schema_version":"1.0","canonical_sha256":"daae1f88f55485c4ed609bbca5afe3b527819fc5a5cc0dee838b6c22432d4b44","source":{"kind":"arxiv","id":"2605.18864","version":1},"attestation_state":"computed","paper":{"title":"SAGE: Shaping Anchors for Guided Exploration in RLVR of LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chanuk Lee, Minki Kang, Sung Ju Hwang","submitted_at":"2026-05-15T07:42:21Z","abstract_excerpt":"Recent studies observe that reinforcement learning with verifiable rewards (RLVR) reliably improves pass@1 on reasoning tasks, yet often fails to yield comparable gains in pass@k, raising the question of whether RLVR genuinely enables large language models to acquire novel reasoning abilities or merely enhances the efficiency of sampling reasoning modes already present in the base model. Prior analyses largely support the latter view, attributing this limitation to structural properties of standard RLVR objectives that result in insufficient exploration pressure. In this work, we argue that a "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.18864","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-15T07:42:21Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a37bdcd0242029f2b73424435001f3f1d3bff5636b32aae9c0898d3ec21acdfc","abstract_canon_sha256":"92fa127f20e2720ff788e6f74d3afdc084565cc8edaf9bd025aec4b055eb3f90"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:06:29.209217Z","signature_b64":"D6W0RVKJwJMVm+R2+Kj4Lly5zJIPpEVZzpvGjJ70oztQp6qdvogMdazu/U9oqerrhMUPZdIZ+WAyp0A2FuHODg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"daae1f88f55485c4ed609bbca5afe3b527819fc5a5cc0dee838b6c22432d4b44","last_reissued_at":"2026-05-20T00:06:29.208416Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:06:29.208416Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SAGE: Shaping Anchors for Guided Exploration in RLVR of LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chanuk Lee, Minki Kang, Sung Ju Hwang","submitted_at":"2026-05-15T07:42:21Z","abstract_excerpt":"Recent studies observe that reinforcement learning with verifiable rewards (RLVR) reliably improves pass@1 on reasoning tasks, yet often fails to yield comparable gains in pass@k, raising the question of whether RLVR genuinely enables large language models to acquire novel reasoning abilities or merely enhances the efficiency of sampling reasoning modes already present in the base model. Prior analyses largely support the latter view, attributing this limitation to structural properties of standard RLVR objectives that result in insufficient exploration pressure. In this work, we argue that a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18864","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18864/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.18864","created_at":"2026-05-20T00:06:29.208550+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.18864v1","created_at":"2026-05-20T00:06:29.208550+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18864","created_at":"2026-05-20T00:06:29.208550+00:00"},{"alias_kind":"pith_short_12","alias_value":"3KXB7CHVKSC4","created_at":"2026-05-20T00:06:29.208550+00:00"},{"alias_kind":"pith_short_16","alias_value":"3KXB7CHVKSC4J3LA","created_at":"2026-05-20T00:06:29.208550+00:00"},{"alias_kind":"pith_short_8","alias_value":"3KXB7CHV","created_at":"2026-05-20T00:06:29.208550+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU","json":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU.json","graph_json":"https://pith.science/api/pith-number/3KXB7CHVKSC4J3LATO6KLL7DWU/graph.json","events_json":"https://pith.science/api/pith-number/3KXB7CHVKSC4J3LATO6KLL7DWU/events.json","paper":"https://pith.science/paper/3KXB7CHV"},"agent_actions":{"view_html":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU","download_json":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU.json","view_paper":"https://pith.science/paper/3KXB7CHV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.18864&json=true","fetch_graph":"https://pith.science/api/pith-number/3KXB7CHVKSC4J3LATO6KLL7DWU/graph.json","fetch_events":"https://pith.science/api/pith-number/3KXB7CHVKSC4J3LATO6KLL7DWU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU/action/storage_attestation","attest_author":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU/action/author_attestation","sign_citation":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU/action/citation_signature","submit_replication":"https://pith.science/pith/3KXB7CHVKSC4J3LATO6KLL7DWU/action/replication_record"}},"created_at":"2026-05-20T00:06:29.208550+00:00","updated_at":"2026-05-20T00:06:29.208550+00:00"}