{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:FBNZSVAADCMHIAINYGJFFYTTUH","short_pith_number":"pith:FBNZSVAA","canonical_record":{"source":{"id":"2510.14901","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-16T17:18:11Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"cea8936658e1fb416995205bc8324e264599d38a3883317244765200d3346c24","abstract_canon_sha256":"4ecb3b6cb04907b232cb999bb15cf3e1b01ea1ad94e7c536f7256b8d3dc1ee33"},"schema_version":"1.0"},"canonical_sha256":"285b995400189874010dc19252e273a1c0d274ea513fd36e9abf8f9ffc69d84e","source":{"kind":"arxiv","id":"2510.14901","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.14901","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"arxiv_version","alias_value":"2510.14901v1","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.14901","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"pith_short_12","alias_value":"FBNZSVAADCMH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"FBNZSVAADCMHIAIN","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"FBNZSVAA","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:FBNZSVAADCMHIAINYGJFFYTTUH","target":"record","payload":{"canonical_record":{"source":{"id":"2510.14901","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-16T17:18:11Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"cea8936658e1fb416995205bc8324e264599d38a3883317244765200d3346c24","abstract_canon_sha256":"4ecb3b6cb04907b232cb999bb15cf3e1b01ea1ad94e7c536f7256b8d3dc1ee33"},"schema_version":"1.0"},"canonical_sha256":"285b995400189874010dc19252e273a1c0d274ea513fd36e9abf8f9ffc69d84e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:15.272477Z","signature_b64":"ogxGxWB92mPd9imEdFA+IoA5U0fKjjOukzqK6makh7B75AcTq4ELFx7htup1YGzm8d/awq/q8G1BlkNjE/hkAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"285b995400189874010dc19252e273a1c0d274ea513fd36e9abf8f9ffc69d84e","last_reissued_at":"2026-05-17T23:38:15.272016Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:15.272016Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.14901","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/vIzG7xuBM4+TfFWxHBnSvOJjT9fshsPbnyeQNqfs22a/z6RYAU/mAWpFBMt99yDQa8bLAlhIMt7B3Gs7Jm6AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T07:45:46.589597Z"},"content_sha256":"b50b1ab84c32d735fe3808a1b9932ebd6650e2f650d8b8bc8dbd50859ad0e7ed","schema_version":"1.0","event_id":"sha256:b50b1ab84c32d735fe3808a1b9932ebd6650e2f650d8b8bc8dbd50859ad0e7ed"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:FBNZSVAADCMHIAINYGJFFYTTUH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reasoning with Sampling: Your Base Model is Smarter Than You Think","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A simple iterative sampling algorithm using only a base model's likelihoods can elicit reasoning performance that nearly matches or exceeds reinforcement learning on tasks like math and coding.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Aayush Karan, Yilun Du","submitted_at":"2025-10-16T17:18:11Z","abstract_excerpt":"Frontier reasoning models have exhibited incredible capabilities across a wide array of disciplines, driven by posttraining large language models (LLMs) with reinforcement learning (RL). However, despite the widespread success of this paradigm, much of the literature has been devoted to disentangling truly novel behaviors that emerge during RL but are not present in the base models. In our work, we approach this question from a different angle, instead asking whether comparable reasoning capabilites can be elicited from base models at inference time by pure sampling, without any additional tra"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Over different base models, we show that our algorithm offers substantial boosts in reasoning that nearly match and even outperform those from RL on a wide variety of single-shot tasks, including MATH500, HumanEval, and GPQA.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the base model's likelihoods contain sufficient signal to be iteratively reshaped into higher-quality reasoning trajectories via a simple MCMC-style sampler without any training or external verifier.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"An MCMC-inspired iterative sampler applied to base LLMs elicits reasoning performance that nearly matches or exceeds RL-posttrained models on MATH500, HumanEval, and GPQA while preserving output diversity.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A simple iterative sampling algorithm using only a base model's likelihoods can elicit reasoning performance that nearly matches or exceeds reinforcement learning on tasks like math and coding.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4c3db4abc89fc35ad229d37fba8da9f3e8a70103c81671eb1642f3b40242dd30"},"source":{"id":"2510.14901","kind":"arxiv","version":1},"verdict":{"id":"d35122ec-9601-4e85-bcb1-a7c4cbc56e1c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T03:11:43.647449Z","strongest_claim":"Over different base models, we show that our algorithm offers substantial boosts in reasoning that nearly match and even outperform those from RL on a wide variety of single-shot tasks, including MATH500, HumanEval, and GPQA.","one_line_summary":"An MCMC-inspired iterative sampler applied to base LLMs elicits reasoning performance that nearly matches or exceeds RL-posttrained models on MATH500, HumanEval, and GPQA while preserving output diversity.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the base model's likelihoods contain sufficient signal to be iteratively reshaped into higher-quality reasoning trajectories via a simple MCMC-style sampler without any training or external verifier.","pith_extraction_headline":"A simple iterative sampling algorithm using only a base model's likelihoods can elicit reasoning performance that nearly matches or exceeds reinforcement learning on tasks like math and coding."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7c0d59d9ed0e8b1a16e6e404271f51c1969b8b7b9b3aca1c17cee0d361c2ea9f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"d35122ec-9601-4e85-bcb1-a7c4cbc56e1c"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3J8Cufdm7lVa2Sq30saGhnWyAOEtrKyQwtASVMq7kctg4RmKiYSAs7shJ3dGLqm8aSzXRU0wc/hTf73bUiSlDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T07:45:46.590049Z"},"content_sha256":"160c82abf8e58902366c4e1ee41fac60a83e1c877af0b554e35c490b1baa766f","schema_version":"1.0","event_id":"sha256:160c82abf8e58902366c4e1ee41fac60a83e1c877af0b554e35c490b1baa766f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FBNZSVAADCMHIAINYGJFFYTTUH/bundle.json","state_url":"https://pith.science/pith/FBNZSVAADCMHIAINYGJFFYTTUH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FBNZSVAADCMHIAINYGJFFYTTUH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T07:45:46Z","links":{"resolver":"https://pith.science/pith/FBNZSVAADCMHIAINYGJFFYTTUH","bundle":"https://pith.science/pith/FBNZSVAADCMHIAINYGJFFYTTUH/bundle.json","state":"https://pith.science/pith/FBNZSVAADCMHIAINYGJFFYTTUH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FBNZSVAADCMHIAINYGJFFYTTUH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:FBNZSVAADCMHIAINYGJFFYTTUH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4ecb3b6cb04907b232cb999bb15cf3e1b01ea1ad94e7c536f7256b8d3dc1ee33","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-16T17:18:11Z","title_canon_sha256":"cea8936658e1fb416995205bc8324e264599d38a3883317244765200d3346c24"},"schema_version":"1.0","source":{"id":"2510.14901","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.14901","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"arxiv_version","alias_value":"2510.14901v1","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.14901","created_at":"2026-05-17T23:38:15Z"},{"alias_kind":"pith_short_12","alias_value":"FBNZSVAADCMH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"FBNZSVAADCMHIAIN","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"FBNZSVAA","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:160c82abf8e58902366c4e1ee41fac60a83e1c877af0b554e35c490b1baa766f","target":"graph","created_at":"2026-05-17T23:38:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Over different base models, we show that our algorithm offers substantial boosts in reasoning that nearly match and even outperform those from RL on a wide variety of single-shot tasks, including MATH500, HumanEval, and GPQA."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the base model's likelihoods contain sufficient signal to be iteratively reshaped into higher-quality reasoning trajectories via a simple MCMC-style sampler without any training or external verifier."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"An MCMC-inspired iterative sampler applied to base LLMs elicits reasoning performance that nearly matches or exceeds RL-posttrained models on MATH500, HumanEval, and GPQA while preserving output diversity."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A simple iterative sampling algorithm using only a base model's likelihoods can elicit reasoning performance that nearly matches or exceeds reinforcement learning on tasks like math and coding."}],"snapshot_sha256":"4c3db4abc89fc35ad229d37fba8da9f3e8a70103c81671eb1642f3b40242dd30"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7c0d59d9ed0e8b1a16e6e404271f51c1969b8b7b9b3aca1c17cee0d361c2ea9f"},"paper":{"abstract_excerpt":"Frontier reasoning models have exhibited incredible capabilities across a wide array of disciplines, driven by posttraining large language models (LLMs) with reinforcement learning (RL). However, despite the widespread success of this paradigm, much of the literature has been devoted to disentangling truly novel behaviors that emerge during RL but are not present in the base models. In our work, we approach this question from a different angle, instead asking whether comparable reasoning capabilites can be elicited from base models at inference time by pure sampling, without any additional tra","authors_text":"Aayush Karan, Yilun Du","cross_cats":["cs.AI","cs.CL"],"headline":"A simple iterative sampling algorithm using only a base model's likelihoods can elicit reasoning performance that nearly matches or exceeds reinforcement learning on tasks like math and coding.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-16T17:18:11Z","title":"Reasoning with Sampling: Your Base Model is Smarter Than You Think"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.14901","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-17T03:11:43.647449Z","id":"d35122ec-9601-4e85-bcb1-a7c4cbc56e1c","model_set":{"reader":"grok-4.3"},"one_line_summary":"An MCMC-inspired iterative sampler applied to base LLMs elicits reasoning performance that nearly matches or exceeds RL-posttrained models on MATH500, HumanEval, and GPQA while preserving output diversity.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A simple iterative sampling algorithm using only a base model's likelihoods can elicit reasoning performance that nearly matches or exceeds reinforcement learning on tasks like math and coding.","strongest_claim":"Over different base models, we show that our algorithm offers substantial boosts in reasoning that nearly match and even outperform those from RL on a wide variety of single-shot tasks, including MATH500, HumanEval, and GPQA.","weakest_assumption":"That the base model's likelihoods contain sufficient signal to be iteratively reshaped into higher-quality reasoning trajectories via a simple MCMC-style sampler without any training or external verifier."}},"verdict_id":"d35122ec-9601-4e85-bcb1-a7c4cbc56e1c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b50b1ab84c32d735fe3808a1b9932ebd6650e2f650d8b8bc8dbd50859ad0e7ed","target":"record","created_at":"2026-05-17T23:38:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4ecb3b6cb04907b232cb999bb15cf3e1b01ea1ad94e7c536f7256b8d3dc1ee33","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-16T17:18:11Z","title_canon_sha256":"cea8936658e1fb416995205bc8324e264599d38a3883317244765200d3346c24"},"schema_version":"1.0","source":{"id":"2510.14901","kind":"arxiv","version":1}},"canonical_sha256":"285b995400189874010dc19252e273a1c0d274ea513fd36e9abf8f9ffc69d84e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"285b995400189874010dc19252e273a1c0d274ea513fd36e9abf8f9ffc69d84e","first_computed_at":"2026-05-17T23:38:15.272016Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:15.272016Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ogxGxWB92mPd9imEdFA+IoA5U0fKjjOukzqK6makh7B75AcTq4ELFx7htup1YGzm8d/awq/q8G1BlkNjE/hkAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:15.272477Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.14901","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b50b1ab84c32d735fe3808a1b9932ebd6650e2f650d8b8bc8dbd50859ad0e7ed","sha256:160c82abf8e58902366c4e1ee41fac60a83e1c877af0b554e35c490b1baa766f"],"state_sha256":"d03844ebd47fa7aad49ddef62914f745c2a70d57b7515b594172bfa4f093b89b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ym7wpHfxTuH9UIpeMzPWwM7kh77REnPv28e6LNI3yGU9/lm6cXDSxZMutIs5fsJM8L/8pF37cQL7BbsN0CZTDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T07:45:46.592296Z","bundle_sha256":"da2e9588158e94cf4482b6ffa1844f9de308dcae030aff3163e71fbb1adc5f14"}}