{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:CKDXK7F2KCN5UBJGJ5YPZSXUEV","short_pith_number":"pith:CKDXK7F2","canonical_record":{"source":{"id":"1905.12580","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T16:54:37Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ff9986fb756c949f925caf9639de66616c055ef7d6c980ebbdf6eaba9d51c0af","abstract_canon_sha256":"f9dcdd20e1c2de2561f92735770e3cb947668f9b7cbf8c3e45f0d709bfa83657"},"schema_version":"1.0"},"canonical_sha256":"1287757cba509bda05264f70fccaf4255b8377129e3219e9f803ec6d83f9eb44","source":{"kind":"arxiv","id":"1905.12580","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.12580","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"arxiv_version","alias_value":"1905.12580v1","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.12580","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"pith_short_12","alias_value":"CKDXK7F2KCN5","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"CKDXK7F2KCN5UBJG","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"CKDXK7F2","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:CKDXK7F2KCN5UBJGJ5YPZSXUEV","target":"record","payload":{"canonical_record":{"source":{"id":"1905.12580","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T16:54:37Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ff9986fb756c949f925caf9639de66616c055ef7d6c980ebbdf6eaba9d51c0af","abstract_canon_sha256":"f9dcdd20e1c2de2561f92735770e3cb947668f9b7cbf8c3e45f0d709bfa83657"},"schema_version":"1.0"},"canonical_sha256":"1287757cba509bda05264f70fccaf4255b8377129e3219e9f803ec6d83f9eb44","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:43.922976Z","signature_b64":"2CbrZ0hxKJjPp+K6Y5Cz2z8+CDzDTICcxWcCpfFltFM6dD2mpKK7rVgP2E7btJImP6rSO8XDjodcWY6lGTh3BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1287757cba509bda05264f70fccaf4255b8377129e3219e9f803ec6d83f9eb44","last_reissued_at":"2026-05-17T23:44:43.922335Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:43.922335Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1905.12580","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hmoLs5do4vLVEvP79rjmtDVr+ywIVfFzpfxz4m31o/T110mWaCxuL4KLG99PYWovJjMjl79x5f3/UqCJ9DwABA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T11:18:44.430716Z"},"content_sha256":"b26a976ab4c84001ca2455b40fa624dfcfa1089b52478f4d69bb2573d6525516","schema_version":"1.0","event_id":"sha256:b26a976ab4c84001ca2455b40fa624dfcfa1089b52478f4d69bb2573d6525516"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:CKDXK7F2KCN5UBJGJ5YPZSXUEV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Model Similarity Mitigates Test Set Overuse","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Benjamin Recht, Horia Mania, John Miller, Ludwig Schmidt, Moritz Hardt","submitted_at":"2019-05-29T16:54:37Z","abstract_excerpt":"Excessive reuse of test data has become commonplace in today's machine learning workflows. Popular benchmarks, competitions, industrial scale tuning, among other applications, all involve test data reuse beyond guidance by statistical confidence bounds. Nonetheless, recent replication studies give evidence that popular benchmarks continue to support progress despite years of extensive reuse. We proffer a new explanation for the apparent longevity of test data: Many proposed models are similar in their predictions and we prove that this similarity mitigates overfitting. Specifically, we show em"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.12580","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rtv5wzZLvFl81d4ypA/ggNFQA/B0Yoab4dIQ/Ly34m93lB1mfKwyg2aPqASHUIEZ7eLAwajIyDu3I8eKw2vVAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T11:18:44.431371Z"},"content_sha256":"0f60ec8ee88c75a968312c660ba30e55294704819d2504ebf17e582db0c6b6fb","schema_version":"1.0","event_id":"sha256:0f60ec8ee88c75a968312c660ba30e55294704819d2504ebf17e582db0c6b6fb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV/bundle.json","state_url":"https://pith.science/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T11:18:44Z","links":{"resolver":"https://pith.science/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV","bundle":"https://pith.science/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV/bundle.json","state":"https://pith.science/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CKDXK7F2KCN5UBJGJ5YPZSXUEV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:CKDXK7F2KCN5UBJGJ5YPZSXUEV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f9dcdd20e1c2de2561f92735770e3cb947668f9b7cbf8c3e45f0d709bfa83657","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T16:54:37Z","title_canon_sha256":"ff9986fb756c949f925caf9639de66616c055ef7d6c980ebbdf6eaba9d51c0af"},"schema_version":"1.0","source":{"id":"1905.12580","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.12580","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"arxiv_version","alias_value":"1905.12580v1","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.12580","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"pith_short_12","alias_value":"CKDXK7F2KCN5","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"CKDXK7F2KCN5UBJG","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"CKDXK7F2","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:0f60ec8ee88c75a968312c660ba30e55294704819d2504ebf17e582db0c6b6fb","target":"graph","created_at":"2026-05-17T23:44:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Excessive reuse of test data has become commonplace in today's machine learning workflows. Popular benchmarks, competitions, industrial scale tuning, among other applications, all involve test data reuse beyond guidance by statistical confidence bounds. Nonetheless, recent replication studies give evidence that popular benchmarks continue to support progress despite years of extensive reuse. We proffer a new explanation for the apparent longevity of test data: Many proposed models are similar in their predictions and we prove that this similarity mitigates overfitting. Specifically, we show em","authors_text":"Benjamin Recht, Horia Mania, John Miller, Ludwig Schmidt, Moritz Hardt","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T16:54:37Z","title":"Model Similarity Mitigates Test Set Overuse"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.12580","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b26a976ab4c84001ca2455b40fa624dfcfa1089b52478f4d69bb2573d6525516","target":"record","created_at":"2026-05-17T23:44:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f9dcdd20e1c2de2561f92735770e3cb947668f9b7cbf8c3e45f0d709bfa83657","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T16:54:37Z","title_canon_sha256":"ff9986fb756c949f925caf9639de66616c055ef7d6c980ebbdf6eaba9d51c0af"},"schema_version":"1.0","source":{"id":"1905.12580","kind":"arxiv","version":1}},"canonical_sha256":"1287757cba509bda05264f70fccaf4255b8377129e3219e9f803ec6d83f9eb44","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1287757cba509bda05264f70fccaf4255b8377129e3219e9f803ec6d83f9eb44","first_computed_at":"2026-05-17T23:44:43.922335Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:43.922335Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2CbrZ0hxKJjPp+K6Y5Cz2z8+CDzDTICcxWcCpfFltFM6dD2mpKK7rVgP2E7btJImP6rSO8XDjodcWY6lGTh3BA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:43.922976Z","signed_message":"canonical_sha256_bytes"},"source_id":"1905.12580","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b26a976ab4c84001ca2455b40fa624dfcfa1089b52478f4d69bb2573d6525516","sha256:0f60ec8ee88c75a968312c660ba30e55294704819d2504ebf17e582db0c6b6fb"],"state_sha256":"a1567911f71bcf6ef3b9501190fb7d5e2790e3539879a3acbf4c0a7ca430e6aa"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KTcd6yt3/dLDNOmXoYkhHwpDLVf417U8NhAh1PHBukYnWZ5uKTjZGPF4BLlifd6osFJzql1Ng9gDT/L/fKMVCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T11:18:44.434992Z","bundle_sha256":"6b12dc8c7e5f9151926e81fd988097f7f50d427830a22edcf8bc3a8c943ba6a3"}}