{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:6BNL46B7OCSYIFKNLDM3UZCT6K","short_pith_number":"pith:6BNL46B7","canonical_record":{"source":{"id":"1901.11409","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-29T18:27:37Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"0442dbe1a36520a60fbe2e813eb4b43835fb4136b11e7d417554b1c995460f95","abstract_canon_sha256":"42c709a89e262f483bebf0445c0a7c549ad2c3f8d4074c6b1560e41d488bb740"},"schema_version":"1.0"},"canonical_sha256":"f05abe783f70a584154d58d9ba6453f28d7cfe1927c18287540d7686b5fe8d67","source":{"kind":"arxiv","id":"1901.11409","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.11409","created_at":"2026-05-17T23:55:01Z"},{"alias_kind":"arxiv_version","alias_value":"1901.11409v1","created_at":"2026-05-17T23:55:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.11409","created_at":"2026-05-17T23:55:01Z"},{"alias_kind":"pith_short_12","alias_value":"6BNL46B7OCSY","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6BNL46B7OCSYIFKN","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6BNL46B7","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:6BNL46B7OCSYIFKNLDM3UZCT6K","target":"record","payload":{"canonical_record":{"source":{"id":"1901.11409","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-29T18:27:37Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"0442dbe1a36520a60fbe2e813eb4b43835fb4136b11e7d417554b1c995460f95","abstract_canon_sha256":"42c709a89e262f483bebf0445c0a7c549ad2c3f8d4074c6b1560e41d488bb740"},"schema_version":"1.0"},"canonical_sha256":"f05abe783f70a584154d58d9ba6453f28d7cfe1927c18287540d7686b5fe8d67","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:01.930589Z","signature_b64":"ZUBZtTlg2Y0rM8OCjAnX1GSineFg+dn3e8YEZCma+Ec8cEQFiXR8HTsxNDUZFkXLd6qoXfMdBEuc810HQ5D4Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f05abe783f70a584154d58d9ba6453f28d7cfe1927c18287540d7686b5fe8d67","last_reissued_at":"2026-05-17T23:55:01.929994Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:01.929994Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.11409","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:55:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nlABMWwqQiMAV3DRWjWj253Vi5QLlu6rUl7Y7P27l/znoAOqXBoY6z/0+Jc1FanaZnZEOMMe4rM4xXRRtctgDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T18:05:30.515354Z"},"content_sha256":"9df2d29da0e1b7630d7fce8e410d7595f86e92ba7dad8bc313948cd63468b383","schema_version":"1.0","event_id":"sha256:9df2d29da0e1b7630d7fce8e410d7595f86e92ba7dad8bc313948cd63468b383"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:6BNL46B7OCSYIFKNLDM3UZCT6K","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Semantic Redundancies in Image-Classification Datasets: The 10% You Don't Need","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CV","authors_text":"Hossein Mobahi, Samy Bengio, Vighnesh Birodkar","submitted_at":"2019-01-29T18:27:37Z","abstract_excerpt":"Large datasets have been crucial to the success of deep learning models in the recent years, which keep performing better as they are trained with more labelled data. While there have been sustained efforts to make these models more data-efficient, the potential benefit of understanding the data itself, is largely untapped. Specifically, focusing on object recognition tasks, we wonder if for common benchmark datasets we can do better than random subsets of the data and find a subset that can generalize on par with the full dataset when trained on. To our knowledge, this is the first result tha"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.11409","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:55:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zf4WmWNbFdRd7FjeSvHem3NF/cWmT/ETax8nt2V++ATUNPsm4cPpMpr3rs+e9ZYq/YKfw/3tjCXXGGTlGlZnCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T18:05:30.516015Z"},"content_sha256":"947a1d8f73700a77ee98e05a2ccaf95e6ce03b3cb3a58b6d6bb4fa49dab03970","schema_version":"1.0","event_id":"sha256:947a1d8f73700a77ee98e05a2ccaf95e6ce03b3cb3a58b6d6bb4fa49dab03970"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6BNL46B7OCSYIFKNLDM3UZCT6K/bundle.json","state_url":"https://pith.science/pith/6BNL46B7OCSYIFKNLDM3UZCT6K/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6BNL46B7OCSYIFKNLDM3UZCT6K/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T18:05:30Z","links":{"resolver":"https://pith.science/pith/6BNL46B7OCSYIFKNLDM3UZCT6K","bundle":"https://pith.science/pith/6BNL46B7OCSYIFKNLDM3UZCT6K/bundle.json","state":"https://pith.science/pith/6BNL46B7OCSYIFKNLDM3UZCT6K/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6BNL46B7OCSYIFKNLDM3UZCT6K/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:6BNL46B7OCSYIFKNLDM3UZCT6K","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"42c709a89e262f483bebf0445c0a7c549ad2c3f8d4074c6b1560e41d488bb740","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-29T18:27:37Z","title_canon_sha256":"0442dbe1a36520a60fbe2e813eb4b43835fb4136b11e7d417554b1c995460f95"},"schema_version":"1.0","source":{"id":"1901.11409","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.11409","created_at":"2026-05-17T23:55:01Z"},{"alias_kind":"arxiv_version","alias_value":"1901.11409v1","created_at":"2026-05-17T23:55:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.11409","created_at":"2026-05-17T23:55:01Z"},{"alias_kind":"pith_short_12","alias_value":"6BNL46B7OCSY","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6BNL46B7OCSYIFKN","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6BNL46B7","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:947a1d8f73700a77ee98e05a2ccaf95e6ce03b3cb3a58b6d6bb4fa49dab03970","target":"graph","created_at":"2026-05-17T23:55:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Large datasets have been crucial to the success of deep learning models in the recent years, which keep performing better as they are trained with more labelled data. While there have been sustained efforts to make these models more data-efficient, the potential benefit of understanding the data itself, is largely untapped. Specifically, focusing on object recognition tasks, we wonder if for common benchmark datasets we can do better than random subsets of the data and find a subset that can generalize on par with the full dataset when trained on. To our knowledge, this is the first result tha","authors_text":"Hossein Mobahi, Samy Bengio, Vighnesh Birodkar","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-29T18:27:37Z","title":"Semantic Redundancies in Image-Classification Datasets: The 10% You Don't Need"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.11409","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9df2d29da0e1b7630d7fce8e410d7595f86e92ba7dad8bc313948cd63468b383","target":"record","created_at":"2026-05-17T23:55:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"42c709a89e262f483bebf0445c0a7c549ad2c3f8d4074c6b1560e41d488bb740","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-29T18:27:37Z","title_canon_sha256":"0442dbe1a36520a60fbe2e813eb4b43835fb4136b11e7d417554b1c995460f95"},"schema_version":"1.0","source":{"id":"1901.11409","kind":"arxiv","version":1}},"canonical_sha256":"f05abe783f70a584154d58d9ba6453f28d7cfe1927c18287540d7686b5fe8d67","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f05abe783f70a584154d58d9ba6453f28d7cfe1927c18287540d7686b5fe8d67","first_computed_at":"2026-05-17T23:55:01.929994Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:55:01.929994Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ZUBZtTlg2Y0rM8OCjAnX1GSineFg+dn3e8YEZCma+Ec8cEQFiXR8HTsxNDUZFkXLd6qoXfMdBEuc810HQ5D4Ag==","signature_status":"signed_v1","signed_at":"2026-05-17T23:55:01.930589Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.11409","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9df2d29da0e1b7630d7fce8e410d7595f86e92ba7dad8bc313948cd63468b383","sha256:947a1d8f73700a77ee98e05a2ccaf95e6ce03b3cb3a58b6d6bb4fa49dab03970"],"state_sha256":"ec8748cf40b2f08976c04dc1e0a74eb5c95256587bf3b047ef39dd27e647c1eb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XLSaoJzdhh5SlJfTen5xbxABujGIp2U4+4v1fgEkCSPEoLDRbBPifAIvXuuwJb3zzitXAQRIg1yww8I4BM8fDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T18:05:30.519186Z","bundle_sha256":"47aa34081cd6956898570bcb51ab32e3d63344b5ccfe92883839f35f6508b170"}}