{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AH6MEJKABSUSIU7BDTCQKBNC5A","short_pith_number":"pith:AH6MEJKA","canonical_record":{"source":{"id":"2605.13188","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T08:43:57Z","cross_cats_sorted":["cs.CL","cs.LG","stat.ME"],"title_canon_sha256":"fb4762aaf6ad55680c127267071bd7df8970c1c296c393105322566d9ca09b0a","abstract_canon_sha256":"d8343a4d402c456398c5031032a0a0a0835170a2dc296fe0a18ced6781180fca"},"schema_version":"1.0"},"canonical_sha256":"01fcc225400ca92453e11cc50505a2e805615ff309900e044d946cb3cab9aec7","source":{"kind":"arxiv","id":"2605.13188","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13188","created_at":"2026-05-18T03:08:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13188v1","created_at":"2026-05-18T03:08:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13188","created_at":"2026-05-18T03:08:56Z"},{"alias_kind":"pith_short_12","alias_value":"AH6MEJKABSUS","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"AH6MEJKABSUSIU7B","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"AH6MEJKA","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AH6MEJKABSUSIU7BDTCQKBNC5A","target":"record","payload":{"canonical_record":{"source":{"id":"2605.13188","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T08:43:57Z","cross_cats_sorted":["cs.CL","cs.LG","stat.ME"],"title_canon_sha256":"fb4762aaf6ad55680c127267071bd7df8970c1c296c393105322566d9ca09b0a","abstract_canon_sha256":"d8343a4d402c456398c5031032a0a0a0835170a2dc296fe0a18ced6781180fca"},"schema_version":"1.0"},"canonical_sha256":"01fcc225400ca92453e11cc50505a2e805615ff309900e044d946cb3cab9aec7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:08:56.190193Z","signature_b64":"k4ft7kwarO1NgCQfBAEijiMrZSB+3vnJ8ovf20nKVQCG1PqmFlKao7R3BDGWG4VfedXYl22tZJX2a+kYRRypDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"01fcc225400ca92453e11cc50505a2e805615ff309900e044d946cb3cab9aec7","last_reissued_at":"2026-05-18T03:08:56.189437Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:08:56.189437Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.13188","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:08:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"seR/O9q8UP/h6eFVwBkawe41eQLusKJw1045rTHZA/mQ6P77H1anpnWi9pgQQgQMZvdVn9Q4qiXlrbzZYHyMDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T16:52:05.696463Z"},"content_sha256":"1fb45da7daaf1c42df6be74116d56eb43244d0ac2ec37469ebc8c172bcb6d502","schema_version":"1.0","event_id":"sha256:1fb45da7daaf1c42df6be74116d56eb43244d0ac2ec37469ebc8c172bcb6d502"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AH6MEJKABSUSIU7BDTCQKBNC5A","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"LLMs as Implicit Imputers: Uncertainty Should Scale with Missing Information","license":"http://creativecommons.org/licenses/by/4.0/","headline":"LLMs should increase uncertainty as context is removed, with entropy scaling like in multiple imputation while confidence does not.","cross_cats":["cs.CL","cs.LG","stat.ME"],"primary_cat":"stat.ML","authors_text":"Stef van Buuren","submitted_at":"2026-05-13T08:43:57Z","abstract_excerpt":"Large language models (LLMs) are increasingly deployed in settings where the available context is incomplete or degraded. We argue that an LLM generating answers under incomplete context can be viewed as an implicit imputer, and evaluated against a criterion from the multiple imputation (MI) literature: uncertainty should scale with the amount of missing information. We assess this criterion on SQuAD, using a controlled framework in which context availability is varied across five levels. We evaluate two answer-level uncertainty measures that can be estimated from repeated sampling: sampling-b"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Entropy increases with context removal, consistent with the MI analogy, and explains substantially more variance in accuracy than confidence across all evidence levels (quadratic R² gap up to 0.057).","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That controlled removal of context segments on SQuAD questions creates a representative proxy for the kinds of missing information LLMs encounter in open-ended real-world use.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Response entropy in LLMs rises with missing context on SQuAD while sampling-based confidence stays high, supporting the multiple imputation criterion and introducing a diagnostic for uncertainty reduction by context level.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LLMs should increase uncertainty as context is removed, with entropy scaling like in multiple imputation while confidence does not.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"dfc0de67da95a8b4f46285c13a5454b8d3d4bf4307f9e8c4300ced6d4d08b6df"},"source":{"id":"2605.13188","kind":"arxiv","version":1},"verdict":{"id":"c70d8f13-747d-497c-9828-25c5d21d2f01","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T17:53:30.545347Z","strongest_claim":"Entropy increases with context removal, consistent with the MI analogy, and explains substantially more variance in accuracy than confidence across all evidence levels (quadratic R² gap up to 0.057).","one_line_summary":"Response entropy in LLMs rises with missing context on SQuAD while sampling-based confidence stays high, supporting the multiple imputation criterion and introducing a diagnostic for uncertainty reduction by context level.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That controlled removal of context segments on SQuAD questions creates a representative proxy for the kinds of missing information LLMs encounter in open-ended real-world use.","pith_extraction_headline":"LLMs should increase uncertainty as context is removed, with entropy scaling like in multiple imputation while confidence does not."},"references":{"count":12,"sample":[{"doi":"","year":2015,"title":"Bartlett, J. W. and Seaman, S. R. and White, I. R. and Carpenter, J. R. , title =. Statistical Methods in Medical Research , volume =. 2015 , location =","work_id":"f02110b5-2d25-4759-adb0-9f48691ae9bc","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"International Conference on Machine Learning , pages=","work_id":"0d19dee1-1c05-4b63-8bdd-7a9cf30cfcd7","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Language Models (Mostly) Know What They Know","work_id":"8ca58a10-da41-4f70-baae-7e449512e345","ref_index":3,"cited_arxiv_id":"2207.05221","is_internal_anchor":true},{"doi":"","year":2016,"title":"Rajpurkar, P. and Zhang, J. and Lopyrev, K. and Liang, P. , booktitle =. 2016 , publisher =","work_id":"0e04c1c5-8f7c-44b4-91e8-1f79f81e060a","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation","work_id":"d66d411b-c2c1-4cd6-8a6b-9fac872fa257","ref_index":5,"cited_arxiv_id":"2302.09664","is_internal_anchor":true}],"resolved_work":12,"snapshot_sha256":"a312d13f474b04f4e611e5632d2e6c009fae547e710140a99303c10e226f61a8","internal_anchors":2},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"c70d8f13-747d-497c-9828-25c5d21d2f01"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:08:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PHBcj35pkJobMXs+neaZMuGJfUxVZfZTNeWqq5MSRAUIWGYsmYgwxY4MWLyPrpXTFatjia59+FdRiZDeobdTBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T16:52:05.697214Z"},"content_sha256":"06d1f936ee3fd4e58faed71c30b270df079617e9afe1ae9f6b374c78e2dd9753","schema_version":"1.0","event_id":"sha256:06d1f936ee3fd4e58faed71c30b270df079617e9afe1ae9f6b374c78e2dd9753"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AH6MEJKABSUSIU7BDTCQKBNC5A/bundle.json","state_url":"https://pith.science/pith/AH6MEJKABSUSIU7BDTCQKBNC5A/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AH6MEJKABSUSIU7BDTCQKBNC5A/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T16:52:05Z","links":{"resolver":"https://pith.science/pith/AH6MEJKABSUSIU7BDTCQKBNC5A","bundle":"https://pith.science/pith/AH6MEJKABSUSIU7BDTCQKBNC5A/bundle.json","state":"https://pith.science/pith/AH6MEJKABSUSIU7BDTCQKBNC5A/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AH6MEJKABSUSIU7BDTCQKBNC5A/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AH6MEJKABSUSIU7BDTCQKBNC5A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d8343a4d402c456398c5031032a0a0a0835170a2dc296fe0a18ced6781180fca","cross_cats_sorted":["cs.CL","cs.LG","stat.ME"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T08:43:57Z","title_canon_sha256":"fb4762aaf6ad55680c127267071bd7df8970c1c296c393105322566d9ca09b0a"},"schema_version":"1.0","source":{"id":"2605.13188","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13188","created_at":"2026-05-18T03:08:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13188v1","created_at":"2026-05-18T03:08:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13188","created_at":"2026-05-18T03:08:56Z"},{"alias_kind":"pith_short_12","alias_value":"AH6MEJKABSUS","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"AH6MEJKABSUSIU7B","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"AH6MEJKA","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:06d1f936ee3fd4e58faed71c30b270df079617e9afe1ae9f6b374c78e2dd9753","target":"graph","created_at":"2026-05-18T03:08:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Entropy increases with context removal, consistent with the MI analogy, and explains substantially more variance in accuracy than confidence across all evidence levels (quadratic R² gap up to 0.057)."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That controlled removal of context segments on SQuAD questions creates a representative proxy for the kinds of missing information LLMs encounter in open-ended real-world use."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Response entropy in LLMs rises with missing context on SQuAD while sampling-based confidence stays high, supporting the multiple imputation criterion and introducing a diagnostic for uncertainty reduction by context level."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"LLMs should increase uncertainty as context is removed, with entropy scaling like in multiple imputation while confidence does not."}],"snapshot_sha256":"dfc0de67da95a8b4f46285c13a5454b8d3d4bf4307f9e8c4300ced6d4d08b6df"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Large language models (LLMs) are increasingly deployed in settings where the available context is incomplete or degraded. We argue that an LLM generating answers under incomplete context can be viewed as an implicit imputer, and evaluated against a criterion from the multiple imputation (MI) literature: uncertainty should scale with the amount of missing information. We assess this criterion on SQuAD, using a controlled framework in which context availability is varied across five levels. We evaluate two answer-level uncertainty measures that can be estimated from repeated sampling: sampling-b","authors_text":"Stef van Buuren","cross_cats":["cs.CL","cs.LG","stat.ME"],"headline":"LLMs should increase uncertainty as context is removed, with entropy scaling like in multiple imputation while confidence does not.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T08:43:57Z","title":"LLMs as Implicit Imputers: Uncertainty Should Scale with Missing Information"},"references":{"count":12,"internal_anchors":2,"resolved_work":12,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Bartlett, J. W. and Seaman, S. R. and White, I. R. and Carpenter, J. R. , title =. Statistical Methods in Medical Research , volume =. 2015 , location =","work_id":"f02110b5-2d25-4759-adb0-9f48691ae9bc","year":2015},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"International Conference on Machine Learning , pages=","work_id":"0d19dee1-1c05-4b63-8bdd-7a9cf30cfcd7","year":2017},{"cited_arxiv_id":"2207.05221","doi":"","is_internal_anchor":true,"ref_index":3,"title":"Language Models (Mostly) Know What They Know","work_id":"8ca58a10-da41-4f70-baae-7e449512e345","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Rajpurkar, P. and Zhang, J. and Lopyrev, K. and Liang, P. , booktitle =. 2016 , publisher =","work_id":"0e04c1c5-8f7c-44b4-91e8-1f79f81e060a","year":2016},{"cited_arxiv_id":"2302.09664","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Semantic Uncertainty: Linguistic Invariances for Uncertainty Estimation in Natural Language Generation","work_id":"d66d411b-c2c1-4cd6-8a6b-9fac872fa257","year":null}],"snapshot_sha256":"a312d13f474b04f4e611e5632d2e6c009fae547e710140a99303c10e226f61a8"},"source":{"id":"2605.13188","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T17:53:30.545347Z","id":"c70d8f13-747d-497c-9828-25c5d21d2f01","model_set":{"reader":"grok-4.3"},"one_line_summary":"Response entropy in LLMs rises with missing context on SQuAD while sampling-based confidence stays high, supporting the multiple imputation criterion and introducing a diagnostic for uncertainty reduction by context level.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"LLMs should increase uncertainty as context is removed, with entropy scaling like in multiple imputation while confidence does not.","strongest_claim":"Entropy increases with context removal, consistent with the MI analogy, and explains substantially more variance in accuracy than confidence across all evidence levels (quadratic R² gap up to 0.057).","weakest_assumption":"That controlled removal of context segments on SQuAD questions creates a representative proxy for the kinds of missing information LLMs encounter in open-ended real-world use."}},"verdict_id":"c70d8f13-747d-497c-9828-25c5d21d2f01"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1fb45da7daaf1c42df6be74116d56eb43244d0ac2ec37469ebc8c172bcb6d502","target":"record","created_at":"2026-05-18T03:08:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d8343a4d402c456398c5031032a0a0a0835170a2dc296fe0a18ced6781180fca","cross_cats_sorted":["cs.CL","cs.LG","stat.ME"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T08:43:57Z","title_canon_sha256":"fb4762aaf6ad55680c127267071bd7df8970c1c296c393105322566d9ca09b0a"},"schema_version":"1.0","source":{"id":"2605.13188","kind":"arxiv","version":1}},"canonical_sha256":"01fcc225400ca92453e11cc50505a2e805615ff309900e044d946cb3cab9aec7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"01fcc225400ca92453e11cc50505a2e805615ff309900e044d946cb3cab9aec7","first_computed_at":"2026-05-18T03:08:56.189437Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:08:56.189437Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"k4ft7kwarO1NgCQfBAEijiMrZSB+3vnJ8ovf20nKVQCG1PqmFlKao7R3BDGWG4VfedXYl22tZJX2a+kYRRypDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:08:56.190193Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13188","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1fb45da7daaf1c42df6be74116d56eb43244d0ac2ec37469ebc8c172bcb6d502","sha256:06d1f936ee3fd4e58faed71c30b270df079617e9afe1ae9f6b374c78e2dd9753"],"state_sha256":"7d76355718ced92fbef76bf5f072b9d382a325f8b77830149f63c35ca2d3685e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KvrZXDg5cpTM+GFOufHQ02vwgZzhZr5pzYzQbS2nQBr+01rRR4Dn3F6rO8WeNucF1JKwidVcMQuReRD7RJqXBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T16:52:05.700867Z","bundle_sha256":"cf381735013933b58bf12374ba1e413419ac9b9257ce58ce3a133f908711f062"}}