{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2022:BFPBRD4FKNT44DNQQDW5NDPNQJ","short_pith_number":"pith:BFPBRD4F","canonical_record":{"source":{"id":"2205.14334","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2022-05-28T05:02:31Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"c0ee3606ce5cf484b933e412df2861c63b35d8078aa38ec6fbdc9c3ed3ce0a0e","abstract_canon_sha256":"77c7c1f6c18f4d94025f67e8ed0712faa7b963a1b015c5b72a76f2e1043da9fa"},"schema_version":"1.0"},"canonical_sha256":"095e188f855367ce0db080edd68ded82449f590a6aed7fe6047344d1874fd1e7","source":{"kind":"arxiv","id":"2205.14334","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2205.14334","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"arxiv_version","alias_value":"2205.14334v2","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2205.14334","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"pith_short_12","alias_value":"BFPBRD4FKNT4","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"BFPBRD4FKNT44DNQ","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"BFPBRD4F","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2022:BFPBRD4FKNT44DNQQDW5NDPNQJ","target":"record","payload":{"canonical_record":{"source":{"id":"2205.14334","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2022-05-28T05:02:31Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"c0ee3606ce5cf484b933e412df2861c63b35d8078aa38ec6fbdc9c3ed3ce0a0e","abstract_canon_sha256":"77c7c1f6c18f4d94025f67e8ed0712faa7b963a1b015c5b72a76f2e1043da9fa"},"schema_version":"1.0"},"canonical_sha256":"095e188f855367ce0db080edd68ded82449f590a6aed7fe6047344d1874fd1e7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:47.109448Z","signature_b64":"4C5vAgT8mYqXprLp1pl8QU8Bo3TpukYXRfKme85m6vIzRXAi+AahHTaaGdS6c05wnVfzEMVpAk1N5k6HiWPcCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"095e188f855367ce0db080edd68ded82449f590a6aed7fe6047344d1874fd1e7","last_reissued_at":"2026-05-17T23:38:47.109011Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:47.109011Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2205.14334","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JO9pKWnE6c9V1etEgvg0b8kLOIyfkYFXEWBCY0I3O93opaDW9VBi+UrDlYDqTL4ux+SDzMax5TL/HeM8U8M1Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T23:30:14.757592Z"},"content_sha256":"0c4240dd4c5b31c83013be6657ca8557a07576469da03b62b1ab095811d4160b","schema_version":"1.0","event_id":"sha256:0c4240dd4c5b31c83013be6657ca8557a07576469da03b62b1ab095811d4160b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2022:BFPBRD4FKNT44DNQQDW5NDPNQJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Teaching Models to Express Their Uncertainty in Words","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"GPT-3 can learn to state its own uncertainty in natural language, and those statements map to well-calibrated probabilities.","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Jacob Hilton, Owain Evans, Stephanie Lin","submitted_at":"2022-05-28T05:02:31Z","abstract_excerpt":"We show that a GPT-3 model can learn to express uncertainty about its own answers in natural language -- without use of model logits. When given a question, the model generates both an answer and a level of confidence (e.g. \"90% confidence\" or \"high confidence\"). These levels map to probabilities that are well calibrated. The model also remains moderately calibrated under distribution shift, and is sensitive to uncertainty in its own answers, rather than imitating human examples. To our knowledge, this is the first time a model has been shown to express calibrated uncertainty about its own ans"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"a GPT-3 model can learn to express uncertainty about its own answers in natural language -- without use of model logits. These levels map to probabilities that are well calibrated.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the verbalized confidence levels reflect the model's actual epistemic uncertainty rather than surface-level imitation of training examples or prompt patterns.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"GPT-3 can learn to express well-calibrated uncertainty about its answers using natural language phrases rather than logits.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"GPT-3 can learn to state its own uncertainty in natural language, and those statements map to well-calibrated probabilities.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4c87f58e0363e4d908e834f650a08065ddeea34a46d0fa6f545f5840ae73d128"},"source":{"id":"2205.14334","kind":"arxiv","version":2},"verdict":{"id":"fe36c0ab-3d73-403c-bdcf-470acbce34fc","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T17:31:22.258256Z","strongest_claim":"a GPT-3 model can learn to express uncertainty about its own answers in natural language -- without use of model logits. These levels map to probabilities that are well calibrated.","one_line_summary":"GPT-3 can learn to express well-calibrated uncertainty about its answers using natural language phrases rather than logits.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the verbalized confidence levels reflect the model's actual epistemic uncertainty rather than surface-level imitation of training examples or prompt patterns.","pith_extraction_headline":"GPT-3 can learn to state its own uncertainty in natural language, and those statements map to well-calibrated probabilities."},"references":{"count":24,"sample":[{"doi":"","year":null,"title":"A General Language Assistant as a Laboratory for Alignment","work_id":"a43f9ea0-01be-47d5-b8ee-a1a9f73381c5","ref_index":1,"cited_arxiv_id":"2112.00861","is_internal_anchor":true},{"doi":"","year":2022,"title":"https://www.gwern.net/GPT-3-nonfiction# calibration, Last accessed on 2022-04-24. Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav ","work_id":"1491d240-ec27-49af-8cc5-dee31d7f13a4","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","ref_index":3,"cited_arxiv_id":"2204.02311","is_internal_anchor":true},{"doi":"","year":2022,"title":"Gabriela Csurka","work_id":"37812da7-1273-4dca-9164-57d9360ab2d1","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"arXiv preprint arXiv:1702.05374 (2017) https://doi.org/10.1007/ 978-3-319-58347-1 1","work_id":"c5eee634-1361-4941-9ae4-4ab25ce09055","ref_index":5,"cited_arxiv_id":"1702.05374","is_internal_anchor":true}],"resolved_work":24,"snapshot_sha256":"f18db649e4fff15d4787a811d4b61750caf6d961111d22bdb91863bd3c0c0e9c","internal_anchors":7},"formal_canon":{"evidence_count":1,"snapshot_sha256":"ee5d5e3b1d89aa2b97ef6f7462c695e3be1b375c2522df17f8df98d4378c2992"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"fe36c0ab-3d73-403c-bdcf-470acbce34fc"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XJul26QrYDzfE0V4PCICxk/6qIW0tMTtPQFmoZtthQMIjnpxUbhQIMEhxfrkgStXJF1/kD9cM1PLAxIC5bOpBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T23:30:14.758093Z"},"content_sha256":"5213f3ea0006ea658703f7843d7e49e95f4fab215547869a87948f81cef6ec89","schema_version":"1.0","event_id":"sha256:5213f3ea0006ea658703f7843d7e49e95f4fab215547869a87948f81cef6ec89"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ/bundle.json","state_url":"https://pith.science/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T23:30:14Z","links":{"resolver":"https://pith.science/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ","bundle":"https://pith.science/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ/bundle.json","state":"https://pith.science/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BFPBRD4FKNT44DNQQDW5NDPNQJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2022:BFPBRD4FKNT44DNQQDW5NDPNQJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"77c7c1f6c18f4d94025f67e8ed0712faa7b963a1b015c5b72a76f2e1043da9fa","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2022-05-28T05:02:31Z","title_canon_sha256":"c0ee3606ce5cf484b933e412df2861c63b35d8078aa38ec6fbdc9c3ed3ce0a0e"},"schema_version":"1.0","source":{"id":"2205.14334","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2205.14334","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"arxiv_version","alias_value":"2205.14334v2","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2205.14334","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"pith_short_12","alias_value":"BFPBRD4FKNT4","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"BFPBRD4FKNT44DNQ","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"BFPBRD4F","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:5213f3ea0006ea658703f7843d7e49e95f4fab215547869a87948f81cef6ec89","target":"graph","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"a GPT-3 model can learn to express uncertainty about its own answers in natural language -- without use of model logits. These levels map to probabilities that are well calibrated."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the verbalized confidence levels reflect the model's actual epistemic uncertainty rather than surface-level imitation of training examples or prompt patterns."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"GPT-3 can learn to express well-calibrated uncertainty about its answers using natural language phrases rather than logits."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"GPT-3 can learn to state its own uncertainty in natural language, and those statements map to well-calibrated probabilities."}],"snapshot_sha256":"4c87f58e0363e4d908e834f650a08065ddeea34a46d0fa6f545f5840ae73d128"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"ee5d5e3b1d89aa2b97ef6f7462c695e3be1b375c2522df17f8df98d4378c2992"},"paper":{"abstract_excerpt":"We show that a GPT-3 model can learn to express uncertainty about its own answers in natural language -- without use of model logits. When given a question, the model generates both an answer and a level of confidence (e.g. \"90% confidence\" or \"high confidence\"). These levels map to probabilities that are well calibrated. The model also remains moderately calibrated under distribution shift, and is sensitive to uncertainty in its own answers, rather than imitating human examples. To our knowledge, this is the first time a model has been shown to express calibrated uncertainty about its own ans","authors_text":"Jacob Hilton, Owain Evans, Stephanie Lin","cross_cats":["cs.AI","cs.LG"],"headline":"GPT-3 can learn to state its own uncertainty in natural language, and those statements map to well-calibrated probabilities.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2022-05-28T05:02:31Z","title":"Teaching Models to Express Their Uncertainty in Words"},"references":{"count":24,"internal_anchors":7,"resolved_work":24,"sample":[{"cited_arxiv_id":"2112.00861","doi":"","is_internal_anchor":true,"ref_index":1,"title":"A General Language Assistant as a Laboratory for Alignment","work_id":"a43f9ea0-01be-47d5-b8ee-a1a9f73381c5","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"https://www.gwern.net/GPT-3-nonfiction# calibration, Last accessed on 2022-04-24. Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav ","work_id":"1491d240-ec27-49af-8cc5-dee31d7f13a4","year":2022},{"cited_arxiv_id":"2204.02311","doi":"","is_internal_anchor":true,"ref_index":3,"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Gabriela Csurka","work_id":"37812da7-1273-4dca-9164-57d9360ab2d1","year":2022},{"cited_arxiv_id":"1702.05374","doi":"","is_internal_anchor":true,"ref_index":5,"title":"arXiv preprint arXiv:1702.05374 (2017) https://doi.org/10.1007/ 978-3-319-58347-1 1","work_id":"c5eee634-1361-4941-9ae4-4ab25ce09055","year":2020}],"snapshot_sha256":"f18db649e4fff15d4787a811d4b61750caf6d961111d22bdb91863bd3c0c0e9c"},"source":{"id":"2205.14334","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T17:31:22.258256Z","id":"fe36c0ab-3d73-403c-bdcf-470acbce34fc","model_set":{"reader":"grok-4.3"},"one_line_summary":"GPT-3 can learn to express well-calibrated uncertainty about its answers using natural language phrases rather than logits.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"GPT-3 can learn to state its own uncertainty in natural language, and those statements map to well-calibrated probabilities.","strongest_claim":"a GPT-3 model can learn to express uncertainty about its own answers in natural language -- without use of model logits. These levels map to probabilities that are well calibrated.","weakest_assumption":"That the verbalized confidence levels reflect the model's actual epistemic uncertainty rather than surface-level imitation of training examples or prompt patterns."}},"verdict_id":"fe36c0ab-3d73-403c-bdcf-470acbce34fc"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0c4240dd4c5b31c83013be6657ca8557a07576469da03b62b1ab095811d4160b","target":"record","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"77c7c1f6c18f4d94025f67e8ed0712faa7b963a1b015c5b72a76f2e1043da9fa","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2022-05-28T05:02:31Z","title_canon_sha256":"c0ee3606ce5cf484b933e412df2861c63b35d8078aa38ec6fbdc9c3ed3ce0a0e"},"schema_version":"1.0","source":{"id":"2205.14334","kind":"arxiv","version":2}},"canonical_sha256":"095e188f855367ce0db080edd68ded82449f590a6aed7fe6047344d1874fd1e7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"095e188f855367ce0db080edd68ded82449f590a6aed7fe6047344d1874fd1e7","first_computed_at":"2026-05-17T23:38:47.109011Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:47.109011Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4C5vAgT8mYqXprLp1pl8QU8Bo3TpukYXRfKme85m6vIzRXAi+AahHTaaGdS6c05wnVfzEMVpAk1N5k6HiWPcCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:47.109448Z","signed_message":"canonical_sha256_bytes"},"source_id":"2205.14334","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0c4240dd4c5b31c83013be6657ca8557a07576469da03b62b1ab095811d4160b","sha256:5213f3ea0006ea658703f7843d7e49e95f4fab215547869a87948f81cef6ec89"],"state_sha256":"dcc3c553c9ec8b0559f9ece4938f5d96674abeca6c0a04d7ccd58e640a63615b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m33vLW99Otm+IBmsNUjnMHctJwQRfC7vSV9kXY+VrFz0WmTJWN0f/AURf61PnovSErNT9tH9RNno6syGXw2fBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T23:30:14.760394Z","bundle_sha256":"67b9705cb96e4ee43ae9a6cb0f760d92020f05efb494823758208d052f36b26e"}}