{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:U62UDQVYCT42ORT674ES3WOGT5","short_pith_number":"pith:U62UDQVY","canonical_record":{"source":{"id":"2604.18419","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T15:38:45Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"df6fafc24c5b84e0d6a5f7fd1acc530d806a9e6f39dc0dd6a16cbf20101f576c","abstract_canon_sha256":"4a38d24792bc36c2a3fa74d9f3ec5de4e5130d01677fc8562d7f67b7db34990d"},"schema_version":"1.0"},"canonical_sha256":"a7b541c2b814f9a7467eff092dd9c69f70c64c238c36e3ac032cbc144bccd897","source":{"kind":"arxiv","id":"2604.18419","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.18419","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"arxiv_version","alias_value":"2604.18419v4","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.18419","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"pith_short_12","alias_value":"U62UDQVYCT42","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"pith_short_16","alias_value":"U62UDQVYCT42ORT6","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"pith_short_8","alias_value":"U62UDQVY","created_at":"2026-05-26T02:05:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:U62UDQVYCT42ORT674ES3WOGT5","target":"record","payload":{"canonical_record":{"source":{"id":"2604.18419","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T15:38:45Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"df6fafc24c5b84e0d6a5f7fd1acc530d806a9e6f39dc0dd6a16cbf20101f576c","abstract_canon_sha256":"4a38d24792bc36c2a3fa74d9f3ec5de4e5130d01677fc8562d7f67b7db34990d"},"schema_version":"1.0"},"canonical_sha256":"a7b541c2b814f9a7467eff092dd9c69f70c64c238c36e3ac032cbc144bccd897","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:05:09.542490Z","signature_b64":"d63hWgEUOLT0a7XyMRSxGVaDd8wu3C5qZtV92PR8vgXlUmpRWdQ7bCEfcHs/oq/tFEgRwZR687C9XpiTNBrtCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a7b541c2b814f9a7467eff092dd9c69f70c64c238c36e3ac032cbc144bccd897","last_reissued_at":"2026-05-26T02:05:09.541440Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:05:09.541440Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.18419","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T02:05:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CFyCETHcpCYPsWBRRKJedJr9JxLUKkb/6EtTeunNqPD0EmqA7L5Yo+9zQP6Bf3AFsj2tkmFyD3Jqt5A+MAC3Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T21:45:26.102133Z"},"content_sha256":"4e9afb641844d13b9e3240a60247987e035305fdd54713ae2f4e53a2cfd79525","schema_version":"1.0","event_id":"sha256:4e9afb641844d13b9e3240a60247987e035305fdd54713ae2f4e53a2cfd79525"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:U62UDQVYCT42ORT674ES3WOGT5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Knowing When to Quit: A Principled Framework for Dynamic Abstention in LLM Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Modeling abstention as an RL action lets LLMs stop unpromising reasoning when value drops below reward","cross_cats":["cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Guy Kushilevitz, Hen Davidov, Nachshon Cohen, Oren Kalinsky, Patrick Rebeschini, Ram Yazdi, Yaron Fairstein","submitted_at":"2026-04-20T15:38:45Z","abstract_excerpt":"LLMs utilizing chain-of-thought reasoning often waste substantial compute by producing long, incorrect responses. Abstention can mitigate this by withholding outputs unlikely to be correct. While most abstention methods decide to withhold outputs before or after generation, dynamic mid-generation abstention considers early termination of unpromising reasoning traces at each token position. Prior work has explored empirical variants of this idea, but principled guidance for the abstention rule remains lacking. We present a formal analysis of dynamic abstention for LLMs, modeling abstention as a"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We show that abstaining when the value function falls below this reward strictly outperforms natural baselines under general conditions.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the value function can be approximated accurately enough during generation to make the threshold rule reliable, and that the regularized RL formulation faithfully captures the token-by-token decision process in real LLMs.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A regularized RL framework for mid-generation abstention in LLMs shows that stopping when the value function falls below a reward threshold strictly improves selective accuracy over baselines.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Modeling abstention as an RL action lets LLMs stop unpromising reasoning when value drops below reward","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d04815540eb0fc109cc47aa7c1bcee9a8c79572062fc334f26128c9acab9449f"},"source":{"id":"2604.18419","kind":"arxiv","version":4},"verdict":{"id":"7e904fd3-a975-44c2-9733-7a1a911e824f","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T06:16:18.039847Z","strongest_claim":"We show that abstaining when the value function falls below this reward strictly outperforms natural baselines under general conditions.","one_line_summary":"A regularized RL framework for mid-generation abstention in LLMs shows that stopping when the value function falls below a reward threshold strictly improves selective accuracy over baselines.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the value function can be approximated accurately enough during generation to make the threshold rule reliable, and that the regularized RL formulation faithfully captures the token-by-token decision process in real LLMs.","pith_extraction_headline":"Modeling abstention as an RL action lets LLMs stop unpromising reasoning when value drops below reward"},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.18419/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-20T04:02:03.267995Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"525279f026aa7e35995902af0eff0281a6719a2ae3b04252b33edf143d1121ad"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"7e904fd3-a975-44c2-9733-7a1a911e824f"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T02:05:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"H2k/fLw1CsXVs+N3KnV0u2KAJNeQUAles9JfmJvv5D34CO4CU/C4BbKRcDhhtUFGBN70CsV3Df565BreUSR/CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T21:45:26.102795Z"},"content_sha256":"87fd41eb9f904ddb43f96e60abebb4fd50ac0129d5e29d7ee577415c46532ad5","schema_version":"1.0","event_id":"sha256:87fd41eb9f904ddb43f96e60abebb4fd50ac0129d5e29d7ee577415c46532ad5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/U62UDQVYCT42ORT674ES3WOGT5/bundle.json","state_url":"https://pith.science/pith/U62UDQVYCT42ORT674ES3WOGT5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/U62UDQVYCT42ORT674ES3WOGT5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T21:45:26Z","links":{"resolver":"https://pith.science/pith/U62UDQVYCT42ORT674ES3WOGT5","bundle":"https://pith.science/pith/U62UDQVYCT42ORT674ES3WOGT5/bundle.json","state":"https://pith.science/pith/U62UDQVYCT42ORT674ES3WOGT5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/U62UDQVYCT42ORT674ES3WOGT5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:U62UDQVYCT42ORT674ES3WOGT5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4a38d24792bc36c2a3fa74d9f3ec5de4e5130d01677fc8562d7f67b7db34990d","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T15:38:45Z","title_canon_sha256":"df6fafc24c5b84e0d6a5f7fd1acc530d806a9e6f39dc0dd6a16cbf20101f576c"},"schema_version":"1.0","source":{"id":"2604.18419","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.18419","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"arxiv_version","alias_value":"2604.18419v4","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.18419","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"pith_short_12","alias_value":"U62UDQVYCT42","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"pith_short_16","alias_value":"U62UDQVYCT42ORT6","created_at":"2026-05-26T02:05:09Z"},{"alias_kind":"pith_short_8","alias_value":"U62UDQVY","created_at":"2026-05-26T02:05:09Z"}],"graph_snapshots":[{"event_id":"sha256:87fd41eb9f904ddb43f96e60abebb4fd50ac0129d5e29d7ee577415c46532ad5","target":"graph","created_at":"2026-05-26T02:05:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We show that abstaining when the value function falls below this reward strictly outperforms natural baselines under general conditions."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the value function can be approximated accurately enough during generation to make the threshold rule reliable, and that the regularized RL formulation faithfully captures the token-by-token decision process in real LLMs."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A regularized RL framework for mid-generation abstention in LLMs shows that stopping when the value function falls below a reward threshold strictly improves selective accuracy over baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Modeling abstention as an RL action lets LLMs stop unpromising reasoning when value drops below reward"}],"snapshot_sha256":"d04815540eb0fc109cc47aa7c1bcee9a8c79572062fc334f26128c9acab9449f"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T04:02:03.267995Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.18419/integrity.json","findings":[],"snapshot_sha256":"525279f026aa7e35995902af0eff0281a6719a2ae3b04252b33edf143d1121ad","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"LLMs utilizing chain-of-thought reasoning often waste substantial compute by producing long, incorrect responses. Abstention can mitigate this by withholding outputs unlikely to be correct. While most abstention methods decide to withhold outputs before or after generation, dynamic mid-generation abstention considers early termination of unpromising reasoning traces at each token position. Prior work has explored empirical variants of this idea, but principled guidance for the abstention rule remains lacking. We present a formal analysis of dynamic abstention for LLMs, modeling abstention as a","authors_text":"Guy Kushilevitz, Hen Davidov, Nachshon Cohen, Oren Kalinsky, Patrick Rebeschini, Ram Yazdi, Yaron Fairstein","cross_cats":["cs.CL","stat.ML"],"headline":"Modeling abstention as an RL action lets LLMs stop unpromising reasoning when value drops below reward","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T15:38:45Z","title":"Knowing When to Quit: A Principled Framework for Dynamic Abstention in LLM Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.18419","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-15T06:16:18.039847Z","id":"7e904fd3-a975-44c2-9733-7a1a911e824f","model_set":{"reader":"grok-4.3"},"one_line_summary":"A regularized RL framework for mid-generation abstention in LLMs shows that stopping when the value function falls below a reward threshold strictly improves selective accuracy over baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Modeling abstention as an RL action lets LLMs stop unpromising reasoning when value drops below reward","strongest_claim":"We show that abstaining when the value function falls below this reward strictly outperforms natural baselines under general conditions.","weakest_assumption":"That the value function can be approximated accurately enough during generation to make the threshold rule reliable, and that the regularized RL formulation faithfully captures the token-by-token decision process in real LLMs."}},"verdict_id":"7e904fd3-a975-44c2-9733-7a1a911e824f"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4e9afb641844d13b9e3240a60247987e035305fdd54713ae2f4e53a2cfd79525","target":"record","created_at":"2026-05-26T02:05:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4a38d24792bc36c2a3fa74d9f3ec5de4e5130d01677fc8562d7f67b7db34990d","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T15:38:45Z","title_canon_sha256":"df6fafc24c5b84e0d6a5f7fd1acc530d806a9e6f39dc0dd6a16cbf20101f576c"},"schema_version":"1.0","source":{"id":"2604.18419","kind":"arxiv","version":4}},"canonical_sha256":"a7b541c2b814f9a7467eff092dd9c69f70c64c238c36e3ac032cbc144bccd897","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a7b541c2b814f9a7467eff092dd9c69f70c64c238c36e3ac032cbc144bccd897","first_computed_at":"2026-05-26T02:05:09.541440Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:05:09.541440Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"d63hWgEUOLT0a7XyMRSxGVaDd8wu3C5qZtV92PR8vgXlUmpRWdQ7bCEfcHs/oq/tFEgRwZR687C9XpiTNBrtCA==","signature_status":"signed_v1","signed_at":"2026-05-26T02:05:09.542490Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.18419","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4e9afb641844d13b9e3240a60247987e035305fdd54713ae2f4e53a2cfd79525","sha256:87fd41eb9f904ddb43f96e60abebb4fd50ac0129d5e29d7ee577415c46532ad5"],"state_sha256":"b1b7734c6c88385356ff710ad46bf416b851e42a9a8692abbc53739b8c053b7e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZkVhK/t/KAFtM1TEx9D2P0GgZ/QeJ4/kFP6ciDnERnT6LRhP+BzYhv/sR5YvnfGY7qqHF+ABR6WAOP9FJMiwDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T21:45:26.105646Z","bundle_sha256":"ebf148b61f740c59155e412eb347a20d61c326c3621b2eda577d9ca4db6fc9e4"}}