{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:CXZEVYUT2HTKSFSB6BHV6KA7CL","short_pith_number":"pith:CXZEVYUT","schema_version":"1.0","canonical_sha256":"15f24ae293d1e6a91641f04f5f281f12c602a926073768f676807c05f91b08f8","source":{"kind":"arxiv","id":"1508.05154","version":2},"attestation_state":"computed","paper":{"title":"Posterior calibration and exploratory analysis for natural language processing models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Brendan O'Connor, Khanh Nguyen","submitted_at":"2015-08-21T00:25:51Z","abstract_excerpt":"Many models in natural language processing define probabilistic distributions over linguistic structures. We argue that (1) the quality of a model' s posterior distribution can and should be directly evaluated, as to whether probabilities correspond to empirical frequencies, and (2) NLP uncertainty can be projected not only to pipeline components, but also to exploratory data analysis, telling a user when to trust and not trust the NLP analysis. We present a method to analyze calibration, and apply it to compare the miscalibration of several commonly used models. We also contribute a coreferen"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1508.05154","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-21T00:25:51Z","cross_cats_sorted":[],"title_canon_sha256":"701c1b80c9e61eff2b60055f3b1d32ed4141cb67184be312e228a212e612962f","abstract_canon_sha256":"c21d32c2aa276dc12f7b21366b49e5008ed7cfd4370da20ac3e88312e4a779ea"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:34:07.139522Z","signature_b64":"7VKYW+F6wuFAKMG/EO8YE8DcSeABb8p+K2CuWnoitLc5GtYti7HuNDXwW2LT3BxMy6Lqycf35PtgkfAwkMIKBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"15f24ae293d1e6a91641f04f5f281f12c602a926073768f676807c05f91b08f8","last_reissued_at":"2026-05-18T01:34:07.139066Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:34:07.139066Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Posterior calibration and exploratory analysis for natural language processing models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Brendan O'Connor, Khanh Nguyen","submitted_at":"2015-08-21T00:25:51Z","abstract_excerpt":"Many models in natural language processing define probabilistic distributions over linguistic structures. We argue that (1) the quality of a model' s posterior distribution can and should be directly evaluated, as to whether probabilities correspond to empirical frequencies, and (2) NLP uncertainty can be projected not only to pipeline components, but also to exploratory data analysis, telling a user when to trust and not trust the NLP analysis. We present a method to analyze calibration, and apply it to compare the miscalibration of several commonly used models. We also contribute a coreferen"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1508.05154","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1508.05154","created_at":"2026-05-18T01:34:07.139131+00:00"},{"alias_kind":"arxiv_version","alias_value":"1508.05154v2","created_at":"2026-05-18T01:34:07.139131+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1508.05154","created_at":"2026-05-18T01:34:07.139131+00:00"},{"alias_kind":"pith_short_12","alias_value":"CXZEVYUT2HTK","created_at":"2026-05-18T12:29:17.054201+00:00"},{"alias_kind":"pith_short_16","alias_value":"CXZEVYUT2HTKSFSB","created_at":"2026-05-18T12:29:17.054201+00:00"},{"alias_kind":"pith_short_8","alias_value":"CXZEVYUT","created_at":"2026-05-18T12:29:17.054201+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2604.06689","citing_title":"Generative Cross-Entropy: A Strictly Proper Loss for Data-Efficient Classification","ref_index":5,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06689","citing_title":"Generative Cross-Entropy: A Strictly Proper Loss for Data-Efficient Classification","ref_index":5,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL","json":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL.json","graph_json":"https://pith.science/api/pith-number/CXZEVYUT2HTKSFSB6BHV6KA7CL/graph.json","events_json":"https://pith.science/api/pith-number/CXZEVYUT2HTKSFSB6BHV6KA7CL/events.json","paper":"https://pith.science/paper/CXZEVYUT"},"agent_actions":{"view_html":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL","download_json":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL.json","view_paper":"https://pith.science/paper/CXZEVYUT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1508.05154&json=true","fetch_graph":"https://pith.science/api/pith-number/CXZEVYUT2HTKSFSB6BHV6KA7CL/graph.json","fetch_events":"https://pith.science/api/pith-number/CXZEVYUT2HTKSFSB6BHV6KA7CL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL/action/storage_attestation","attest_author":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL/action/author_attestation","sign_citation":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL/action/citation_signature","submit_replication":"https://pith.science/pith/CXZEVYUT2HTKSFSB6BHV6KA7CL/action/replication_record"}},"created_at":"2026-05-18T01:34:07.139131+00:00","updated_at":"2026-05-18T01:34:07.139131+00:00"}