{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:RXZFZ5N3ESYVGGOJCSVPAVHINW","short_pith_number":"pith:RXZFZ5N3","canonical_record":{"source":{"id":"2604.10495","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-12T07:01:53Z","cross_cats_sorted":[],"title_canon_sha256":"7e608a9552dec70ce03be97dabeafa8fa001d42d473509ec005ec424e2d925df","abstract_canon_sha256":"894d519a90d57075d4a854cde420946df034c21aa770b37cb5d31ff32abda9c0"},"schema_version":"1.0"},"canonical_sha256":"8df25cf5bb24b15319c914aaf054e86db099d37ac14007921c607d58d5613ceb","source":{"kind":"arxiv","id":"2604.10495","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.10495","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.10495v2","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.10495","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"pith_short_12","alias_value":"RXZFZ5N3ESYV","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"pith_short_16","alias_value":"RXZFZ5N3ESYVGGOJ","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"pith_short_8","alias_value":"RXZFZ5N3","created_at":"2026-06-01T01:02:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:RXZFZ5N3ESYVGGOJCSVPAVHINW","target":"record","payload":{"canonical_record":{"source":{"id":"2604.10495","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-12T07:01:53Z","cross_cats_sorted":[],"title_canon_sha256":"7e608a9552dec70ce03be97dabeafa8fa001d42d473509ec005ec424e2d925df","abstract_canon_sha256":"894d519a90d57075d4a854cde420946df034c21aa770b37cb5d31ff32abda9c0"},"schema_version":"1.0"},"canonical_sha256":"8df25cf5bb24b15319c914aaf054e86db099d37ac14007921c607d58d5613ceb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:02:39.495491Z","signature_b64":"aIBpf+wDRoHyIQCGem+XsOdgZk9o81SMKEZ34j+ZU5S9V/oh6YjXmF9YqrgG+RhGQSaB0Wz8DeaZKOIuZu4ACA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8df25cf5bb24b15319c914aaf054e86db099d37ac14007921c607d58d5613ceb","last_reissued_at":"2026-06-01T01:02:39.494519Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:02:39.494519Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.10495","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:02:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"p1WMs26AVaKaMtb9R0sBRqJODAVlNaoYk6ugOoQiHeDrSwruvhXDXg37va0f2pMXXWWp4PZMi+cqfJ9NU5INAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T09:26:27.023592Z"},"content_sha256":"e013297c1cd917baa6170c9a3fbafc0bf9f99fad5dd1dabb61770b299ae2eb09","schema_version":"1.0","event_id":"sha256:e013297c1cd917baa6170c9a3fbafc0bf9f99fad5dd1dabb61770b299ae2eb09"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:RXZFZ5N3ESYVGGOJCSVPAVHINW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Why Don't You Know? Evaluating the Impact of Uncertainty Sources on Uncertainty Quantification in LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Uncertainty quantification methods for LLMs work only when uncertainty comes from knowledge gaps.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Daniil Orel, Fedor Chernogorskii, Maiya Goloburda, Maxim Panov, Nurkhan Laiyk, Preslav Nakov, Roman Vashurin","submitted_at":"2026-04-12T07:01:53Z","abstract_excerpt":"As Large Language Models (LLMs) are increasingly deployed in real-world applications, reliable uncertainty quantification (UQ) becomes critical for safe and effective use. Most existing UQ approaches for language models aim to produce a single confidence score -- for example, estimating the probability that a model's answer is correct. However, uncertainty in natural language tasks arises from multiple distinct sources, including model knowledge gaps, output variability, and input ambiguity, which have different implications for system behavior and user interaction. In this work, we study how "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our experiments reveal that while many UQ methods perform well when uncertainty stems solely from model knowledge limitations, their performance degrades or becomes misleading when other sources are introduced.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The introduced dataset cleanly separates uncertainty sources without significant label overlap or confounding factors, enabling controlled isolation of each source's effect on UQ performance.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"UQ methods for LLMs perform reliably only when uncertainty comes from model knowledge limits; they degrade or mislead when uncertainty arises from output variability or input ambiguity.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Uncertainty quantification methods for LLMs work only when uncertainty comes from knowledge gaps.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"dd9d3883b28b4cbc7cdbf1509fc8693fc84b2a90ebff1e9e15700671003cf53f"},"source":{"id":"2604.10495","kind":"arxiv","version":2},"verdict":{"id":"71dfa6ed-0629-47af-a7ce-df05d24bdf78","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T15:48:10.381856Z","strongest_claim":"Our experiments reveal that while many UQ methods perform well when uncertainty stems solely from model knowledge limitations, their performance degrades or becomes misleading when other sources are introduced.","one_line_summary":"UQ methods for LLMs perform reliably only when uncertainty comes from model knowledge limits; they degrade or mislead when uncertainty arises from output variability or input ambiguity.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The introduced dataset cleanly separates uncertainty sources without significant label overlap or confounding factors, enabling controlled isolation of each source's effect on UQ performance.","pith_extraction_headline":"Uncertainty quantification methods for LLMs work only when uncertainty comes from knowledge gaps."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.10495/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"71dfa6ed-0629-47af-a7ce-df05d24bdf78"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:02:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Lwj21OPHcHt0tAn0KWXsKHNn/t3IfJvdCh4TKo66lRDCs5SvwBIM+5F6arAIj8h/isfkses2RVCcaI8k8zOwDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T09:26:27.024772Z"},"content_sha256":"28010767769e0b349be4245ee4b28df667b1d34de8a523f387239ee22b68b6c9","schema_version":"1.0","event_id":"sha256:28010767769e0b349be4245ee4b28df667b1d34de8a523f387239ee22b68b6c9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW/bundle.json","state_url":"https://pith.science/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T09:26:27Z","links":{"resolver":"https://pith.science/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW","bundle":"https://pith.science/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW/bundle.json","state":"https://pith.science/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RXZFZ5N3ESYVGGOJCSVPAVHINW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:RXZFZ5N3ESYVGGOJCSVPAVHINW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"894d519a90d57075d4a854cde420946df034c21aa770b37cb5d31ff32abda9c0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-12T07:01:53Z","title_canon_sha256":"7e608a9552dec70ce03be97dabeafa8fa001d42d473509ec005ec424e2d925df"},"schema_version":"1.0","source":{"id":"2604.10495","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.10495","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.10495v2","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.10495","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"pith_short_12","alias_value":"RXZFZ5N3ESYV","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"pith_short_16","alias_value":"RXZFZ5N3ESYVGGOJ","created_at":"2026-06-01T01:02:39Z"},{"alias_kind":"pith_short_8","alias_value":"RXZFZ5N3","created_at":"2026-06-01T01:02:39Z"}],"graph_snapshots":[{"event_id":"sha256:28010767769e0b349be4245ee4b28df667b1d34de8a523f387239ee22b68b6c9","target":"graph","created_at":"2026-06-01T01:02:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our experiments reveal that while many UQ methods perform well when uncertainty stems solely from model knowledge limitations, their performance degrades or becomes misleading when other sources are introduced."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The introduced dataset cleanly separates uncertainty sources without significant label overlap or confounding factors, enabling controlled isolation of each source's effect on UQ performance."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"UQ methods for LLMs perform reliably only when uncertainty comes from model knowledge limits; they degrade or mislead when uncertainty arises from output variability or input ambiguity."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Uncertainty quantification methods for LLMs work only when uncertainty comes from knowledge gaps."}],"snapshot_sha256":"dd9d3883b28b4cbc7cdbf1509fc8693fc84b2a90ebff1e9e15700671003cf53f"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.10495/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As Large Language Models (LLMs) are increasingly deployed in real-world applications, reliable uncertainty quantification (UQ) becomes critical for safe and effective use. Most existing UQ approaches for language models aim to produce a single confidence score -- for example, estimating the probability that a model's answer is correct. However, uncertainty in natural language tasks arises from multiple distinct sources, including model knowledge gaps, output variability, and input ambiguity, which have different implications for system behavior and user interaction. In this work, we study how ","authors_text":"Daniil Orel, Fedor Chernogorskii, Maiya Goloburda, Maxim Panov, Nurkhan Laiyk, Preslav Nakov, Roman Vashurin","cross_cats":[],"headline":"Uncertainty quantification methods for LLMs work only when uncertainty comes from knowledge gaps.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-12T07:01:53Z","title":"Why Don't You Know? Evaluating the Impact of Uncertainty Sources on Uncertainty Quantification in LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.10495","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T15:48:10.381856Z","id":"71dfa6ed-0629-47af-a7ce-df05d24bdf78","model_set":{"reader":"grok-4.3"},"one_line_summary":"UQ methods for LLMs perform reliably only when uncertainty comes from model knowledge limits; they degrade or mislead when uncertainty arises from output variability or input ambiguity.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Uncertainty quantification methods for LLMs work only when uncertainty comes from knowledge gaps.","strongest_claim":"Our experiments reveal that while many UQ methods perform well when uncertainty stems solely from model knowledge limitations, their performance degrades or becomes misleading when other sources are introduced.","weakest_assumption":"The introduced dataset cleanly separates uncertainty sources without significant label overlap or confounding factors, enabling controlled isolation of each source's effect on UQ performance."}},"verdict_id":"71dfa6ed-0629-47af-a7ce-df05d24bdf78"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e013297c1cd917baa6170c9a3fbafc0bf9f99fad5dd1dabb61770b299ae2eb09","target":"record","created_at":"2026-06-01T01:02:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"894d519a90d57075d4a854cde420946df034c21aa770b37cb5d31ff32abda9c0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-12T07:01:53Z","title_canon_sha256":"7e608a9552dec70ce03be97dabeafa8fa001d42d473509ec005ec424e2d925df"},"schema_version":"1.0","source":{"id":"2604.10495","kind":"arxiv","version":2}},"canonical_sha256":"8df25cf5bb24b15319c914aaf054e86db099d37ac14007921c607d58d5613ceb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8df25cf5bb24b15319c914aaf054e86db099d37ac14007921c607d58d5613ceb","first_computed_at":"2026-06-01T01:02:39.494519Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:02:39.494519Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"aIBpf+wDRoHyIQCGem+XsOdgZk9o81SMKEZ34j+ZU5S9V/oh6YjXmF9YqrgG+RhGQSaB0Wz8DeaZKOIuZu4ACA==","signature_status":"signed_v1","signed_at":"2026-06-01T01:02:39.495491Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.10495","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e013297c1cd917baa6170c9a3fbafc0bf9f99fad5dd1dabb61770b299ae2eb09","sha256:28010767769e0b349be4245ee4b28df667b1d34de8a523f387239ee22b68b6c9"],"state_sha256":"7d1cd53e440ab2b3d3999777d19f7f6206c100bd651e2d6c315e5092056c9700"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tjydBEUJrWRMMFoQpkWH5uVCQuTl3qM8acNoZwsJlHXLoz4M4Tof8CQJuIE3SOnMWC86gNhbVjn4dgTyM93WDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T09:26:27.029391Z","bundle_sha256":"57f366dbccb87b9210102b9d55fe26872cfa654da39364807b07152ffee9e58c"}}