{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:QAR5MQYLBXCQYNEHM5KGHJSF4P","short_pith_number":"pith:QAR5MQYL","schema_version":"1.0","canonical_sha256":"8023d6430b0dc50c3487675463a645e3d66f8021ce2e2426ce396b056141ea89","source":{"kind":"arxiv","id":"2606.23313","version":1},"attestation_state":"computed","paper":{"title":"Uncertainty-based Debiasing and Unlearning for Decontamination","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CY","authors_text":"Guangzhi Sun, Mark Gales, Xiao Zhan","submitted_at":"2026-06-22T13:26:29Z","abstract_excerpt":"Benchmark-based evaluation is the dominant paradigm for assessing large language model (LLM) capabilities, yet data contamination inflates reported performance and undermines fair comparison. Existing decontamination methods are evaluated solely through aggregate accuracy, which can obscure substantial differences in per-sample model behaviour, and many require access to an uncontaminated model. In this paper, we propose a sample-level evaluation framework for decontamination that complements accuracy-based assessment with distributional distance metrics, measuring how closely a decontaminated"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.23313","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2026-06-22T13:26:29Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"ea901f8831d1b5244f7a56e8d77ee85e5c6ff3faa25edcc16828d39e45e2ec98","abstract_canon_sha256":"764ae675698d41d43cacb314f65c1594ccdca5c3c3c942d2ff62210586b68316"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:14:16.604403Z","signature_b64":"unUYqPDznkZauIHNHWS5kCKbdk4u0hxmD1b7HezW51W5EnSTwzyq1q3xN/WKqHmjMvDIAl6DXeuRPn0hnl/XAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8023d6430b0dc50c3487675463a645e3d66f8021ce2e2426ce396b056141ea89","last_reissued_at":"2026-06-23T03:14:16.604060Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:14:16.604060Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Uncertainty-based Debiasing and Unlearning for Decontamination","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CY","authors_text":"Guangzhi Sun, Mark Gales, Xiao Zhan","submitted_at":"2026-06-22T13:26:29Z","abstract_excerpt":"Benchmark-based evaluation is the dominant paradigm for assessing large language model (LLM) capabilities, yet data contamination inflates reported performance and undermines fair comparison. Existing decontamination methods are evaluated solely through aggregate accuracy, which can obscure substantial differences in per-sample model behaviour, and many require access to an uncontaminated model. In this paper, we propose a sample-level evaluation framework for decontamination that complements accuracy-based assessment with distributional distance metrics, measuring how closely a decontaminated"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23313","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.23313/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.23313","created_at":"2026-06-23T03:14:16.604114+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.23313v1","created_at":"2026-06-23T03:14:16.604114+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23313","created_at":"2026-06-23T03:14:16.604114+00:00"},{"alias_kind":"pith_short_12","alias_value":"QAR5MQYLBXCQ","created_at":"2026-06-23T03:14:16.604114+00:00"},{"alias_kind":"pith_short_16","alias_value":"QAR5MQYLBXCQYNEH","created_at":"2026-06-23T03:14:16.604114+00:00"},{"alias_kind":"pith_short_8","alias_value":"QAR5MQYL","created_at":"2026-06-23T03:14:16.604114+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P","json":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P.json","graph_json":"https://pith.science/api/pith-number/QAR5MQYLBXCQYNEHM5KGHJSF4P/graph.json","events_json":"https://pith.science/api/pith-number/QAR5MQYLBXCQYNEHM5KGHJSF4P/events.json","paper":"https://pith.science/paper/QAR5MQYL"},"agent_actions":{"view_html":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P","download_json":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P.json","view_paper":"https://pith.science/paper/QAR5MQYL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.23313&json=true","fetch_graph":"https://pith.science/api/pith-number/QAR5MQYLBXCQYNEHM5KGHJSF4P/graph.json","fetch_events":"https://pith.science/api/pith-number/QAR5MQYLBXCQYNEHM5KGHJSF4P/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P/action/storage_attestation","attest_author":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P/action/author_attestation","sign_citation":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P/action/citation_signature","submit_replication":"https://pith.science/pith/QAR5MQYLBXCQYNEHM5KGHJSF4P/action/replication_record"}},"created_at":"2026-06-23T03:14:16.604114+00:00","updated_at":"2026-06-23T03:14:16.604114+00:00"}