{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:UHXBMDQASPG5XYGWXVOS6SX6RG","short_pith_number":"pith:UHXBMDQA","canonical_record":{"source":{"id":"2406.04244","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-06T16:41:39Z","cross_cats_sorted":[],"title_canon_sha256":"d142bdb5ee38989c04bd7e0b7d4be8412f8d73f7813cb9772b1a7b3bc8be4eef","abstract_canon_sha256":"bbd4335ebcbd8bfff3e05e631c12d0350c341ffbc1cef14a1b9a2650f73a44a5"},"schema_version":"1.0"},"canonical_sha256":"a1ee160e0093cddbe0d6bd5d2f4afe899f4f8a616859e8b6b59d1f38f69cac2f","source":{"kind":"arxiv","id":"2406.04244","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.04244","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"arxiv_version","alias_value":"2406.04244v1","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.04244","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"pith_short_12","alias_value":"UHXBMDQASPG5","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"pith_short_16","alias_value":"UHXBMDQASPG5XYGW","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"pith_short_8","alias_value":"UHXBMDQA","created_at":"2026-05-22T23:03:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:UHXBMDQASPG5XYGWXVOS6SX6RG","target":"record","payload":{"canonical_record":{"source":{"id":"2406.04244","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-06T16:41:39Z","cross_cats_sorted":[],"title_canon_sha256":"d142bdb5ee38989c04bd7e0b7d4be8412f8d73f7813cb9772b1a7b3bc8be4eef","abstract_canon_sha256":"bbd4335ebcbd8bfff3e05e631c12d0350c341ffbc1cef14a1b9a2650f73a44a5"},"schema_version":"1.0"},"canonical_sha256":"a1ee160e0093cddbe0d6bd5d2f4afe899f4f8a616859e8b6b59d1f38f69cac2f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T23:03:43.315110Z","signature_b64":"vRmmV+PnUkmJSfRIsXUQV0t8VCzLNowJAeSn1nUATuP2tUhq37o3XVpx27ZkTFZzp8hMBFDUHhj0yp1tax9lBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a1ee160e0093cddbe0d6bd5d2f4afe899f4f8a616859e8b6b59d1f38f69cac2f","last_reissued_at":"2026-05-22T23:03:43.311884Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T23:03:43.311884Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2406.04244","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T23:03:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Li3K7oBrrxaIz/oLbK2OBR+j/Hr/cSXldLMrxjfBKdXsS+/0wkiGIvATPLS7Q/Q56H09gVgfzxunW2U+SpAsBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T01:43:13.186888Z"},"content_sha256":"b1ca5bfb46faf7376a77c2a39435632ee299c0869ce0096e5c9ca2dbdcd93584","schema_version":"1.0","event_id":"sha256:b1ca5bfb46faf7376a77c2a39435632ee299c0869ce0096e5c9ca2dbdcd93584"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:UHXBMDQASPG5XYGWXVOS6SX6RG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Benchmark Data Contamination of Large Language Models: A Survey","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Cheng Xu, Derek Greene, M-Tahar Kechadi, Shuhao Guan","submitted_at":"2024-06-06T16:41:39Z","abstract_excerpt":"The rapid development of Large Language Models (LLMs) like GPT-4, Claude-3, and Gemini has transformed the field of natural language processing. However, it has also resulted in a significant issue known as Benchmark Data Contamination (BDC). This occurs when language models inadvertently incorporate evaluation benchmark information from their training data, leading to inaccurate or unreliable performance during the evaluation phase of the process. This paper reviews the complex challenge of BDC in LLM evaluation and explores alternative assessment methods to mitigate the risks associated with"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2406.04244","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2406.04244/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T23:03:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OFrOkUFcLGvzWux96skNmp9YxkXaZ4p5r22/VT5uv94UFggUC5mn/qNGNEB9qukpQIGBTQoRzwm5Lt18yQXyCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T01:43:13.187671Z"},"content_sha256":"fd79d22c990e3deea4ba75a0c05cc678d42481e0e77d8029a429aa60144ce9bd","schema_version":"1.0","event_id":"sha256:fd79d22c990e3deea4ba75a0c05cc678d42481e0e77d8029a429aa60144ce9bd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UHXBMDQASPG5XYGWXVOS6SX6RG/bundle.json","state_url":"https://pith.science/pith/UHXBMDQASPG5XYGWXVOS6SX6RG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UHXBMDQASPG5XYGWXVOS6SX6RG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T01:43:13Z","links":{"resolver":"https://pith.science/pith/UHXBMDQASPG5XYGWXVOS6SX6RG","bundle":"https://pith.science/pith/UHXBMDQASPG5XYGWXVOS6SX6RG/bundle.json","state":"https://pith.science/pith/UHXBMDQASPG5XYGWXVOS6SX6RG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UHXBMDQASPG5XYGWXVOS6SX6RG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:UHXBMDQASPG5XYGWXVOS6SX6RG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bbd4335ebcbd8bfff3e05e631c12d0350c341ffbc1cef14a1b9a2650f73a44a5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-06T16:41:39Z","title_canon_sha256":"d142bdb5ee38989c04bd7e0b7d4be8412f8d73f7813cb9772b1a7b3bc8be4eef"},"schema_version":"1.0","source":{"id":"2406.04244","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.04244","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"arxiv_version","alias_value":"2406.04244v1","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.04244","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"pith_short_12","alias_value":"UHXBMDQASPG5","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"pith_short_16","alias_value":"UHXBMDQASPG5XYGW","created_at":"2026-05-22T23:03:43Z"},{"alias_kind":"pith_short_8","alias_value":"UHXBMDQA","created_at":"2026-05-22T23:03:43Z"}],"graph_snapshots":[{"event_id":"sha256:fd79d22c990e3deea4ba75a0c05cc678d42481e0e77d8029a429aa60144ce9bd","target":"graph","created_at":"2026-05-22T23:03:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2406.04244/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The rapid development of Large Language Models (LLMs) like GPT-4, Claude-3, and Gemini has transformed the field of natural language processing. However, it has also resulted in a significant issue known as Benchmark Data Contamination (BDC). This occurs when language models inadvertently incorporate evaluation benchmark information from their training data, leading to inaccurate or unreliable performance during the evaluation phase of the process. This paper reviews the complex challenge of BDC in LLM evaluation and explores alternative assessment methods to mitigate the risks associated with","authors_text":"Cheng Xu, Derek Greene, M-Tahar Kechadi, Shuhao Guan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-06T16:41:39Z","title":"Benchmark Data Contamination of Large Language Models: A Survey"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2406.04244","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b1ca5bfb46faf7376a77c2a39435632ee299c0869ce0096e5c9ca2dbdcd93584","target":"record","created_at":"2026-05-22T23:03:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bbd4335ebcbd8bfff3e05e631c12d0350c341ffbc1cef14a1b9a2650f73a44a5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-06T16:41:39Z","title_canon_sha256":"d142bdb5ee38989c04bd7e0b7d4be8412f8d73f7813cb9772b1a7b3bc8be4eef"},"schema_version":"1.0","source":{"id":"2406.04244","kind":"arxiv","version":1}},"canonical_sha256":"a1ee160e0093cddbe0d6bd5d2f4afe899f4f8a616859e8b6b59d1f38f69cac2f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a1ee160e0093cddbe0d6bd5d2f4afe899f4f8a616859e8b6b59d1f38f69cac2f","first_computed_at":"2026-05-22T23:03:43.311884Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T23:03:43.311884Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"vRmmV+PnUkmJSfRIsXUQV0t8VCzLNowJAeSn1nUATuP2tUhq37o3XVpx27ZkTFZzp8hMBFDUHhj0yp1tax9lBQ==","signature_status":"signed_v1","signed_at":"2026-05-22T23:03:43.315110Z","signed_message":"canonical_sha256_bytes"},"source_id":"2406.04244","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b1ca5bfb46faf7376a77c2a39435632ee299c0869ce0096e5c9ca2dbdcd93584","sha256:fd79d22c990e3deea4ba75a0c05cc678d42481e0e77d8029a429aa60144ce9bd"],"state_sha256":"252f1aa145375f0e1e5c7b2d47e16e55929069b6e5ed1ffa02870ff64385780b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Jw+jlHFmxU5rzZGdHj9rXKjGukjBwm5OuvIEsjA4v+9mtXtLhuyaMggtAIIO5oS2NY8aV3OOkOTpCL7uNHc3Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T01:43:13.191603Z","bundle_sha256":"ed090fd1ca5dd9e1c62cbc5abdc69b44cf618a16a2f04612881daa7be9cd139e"}}