{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ORMB5QL25ELHY6N64A7DXYSAK5","short_pith_number":"pith:ORMB5QL2","schema_version":"1.0","canonical_sha256":"74581ec17ae9167c79bee03e3be240577328e6d5c16121a446c6af50fd9e514d","source":{"kind":"arxiv","id":"1811.10959","version":3},"attestation_state":"computed","paper":{"title":"Dataset Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexei A. Efros, Antonio Torralba, Jun-Yan Zhu, Tongzhou Wang","submitted_at":"2018-11-27T13:17:45Z","abstract_excerpt":"Model distillation aims to distill the knowledge of a complex model into a simpler one. In this paper, we consider an alternative formulation called dataset distillation: we keep the model fixed and instead attempt to distill the knowledge from a large training dataset into a small one. The idea is to synthesize a small number of data points that do not need to come from the correct data distribution, but will, when given to the learning algorithm as training data, approximate the model trained on the original data. For example, we show that it is possible to compress 60,000 MNIST training ima"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.10959","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-27T13:17:45Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"9691edf58195baa3cfccc4ef7ac6949132ae091e926b04ed4f72ef7f5eb7fcb0","abstract_canon_sha256":"59490898f61ac21480e0dfe3cf1f9299c4360b54fcd3c64d8120b01143734313"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-23T23:50:24.043297Z","signature_b64":"LK8FLAzTSE7ZNlve6bu0CTVzLoAhpJNDEB3QOEUuvWojgKamJgZWqOHh+8jJBec9a2rTL1II3CdIcfNzRtEsDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"74581ec17ae9167c79bee03e3be240577328e6d5c16121a446c6af50fd9e514d","last_reissued_at":"2026-05-23T23:50:24.040020Z","signature_status":"signed_v1","first_computed_at":"2026-05-23T23:50:24.040020Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Dataset Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexei A. Efros, Antonio Torralba, Jun-Yan Zhu, Tongzhou Wang","submitted_at":"2018-11-27T13:17:45Z","abstract_excerpt":"Model distillation aims to distill the knowledge of a complex model into a simpler one. In this paper, we consider an alternative formulation called dataset distillation: we keep the model fixed and instead attempt to distill the knowledge from a large training dataset into a small one. The idea is to synthesize a small number of data points that do not need to come from the correct data distribution, but will, when given to the learning algorithm as training data, approximate the model trained on the original data. For example, we show that it is possible to compress 60,000 MNIST training ima"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.10959","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/1811.10959/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.10959","created_at":"2026-05-23T23:50:24.040202+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.10959v3","created_at":"2026-05-23T23:50:24.040202+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.10959","created_at":"2026-05-23T23:50:24.040202+00:00"},{"alias_kind":"pith_short_12","alias_value":"ORMB5QL25ELH","created_at":"2026-05-23T23:50:24.040202+00:00"},{"alias_kind":"pith_short_16","alias_value":"ORMB5QL25ELHY6N6","created_at":"2026-05-23T23:50:24.040202+00:00"},{"alias_kind":"pith_short_8","alias_value":"ORMB5QL2","created_at":"2026-05-23T23:50:24.040202+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":26,"internal_anchor_count":26,"sample":[{"citing_arxiv_id":"2310.14768","citing_title":"Policy Gradient with Kernel Quadrature","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2406.10861","citing_title":"Knowledge Distillation in Federated Learning: a Survey on Long Lasting Challenges and New Solutions","ref_index":156,"is_internal_anchor":true},{"citing_arxiv_id":"2411.16312","citing_title":"EPS: Efficient Patch Sampling for Video Overfitting in Deep Super-Resolution Model Training","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21765","citing_title":"Position: The Time for Sampling Is Now! Charting a New Course for Bayesian Deep Learning","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18893","citing_title":"Position: Graph Condensation Needs a Reset -- Move Beyond Full-dataset Training and Model-Dependence","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00578","citing_title":"Federated Distillation for Whole Slide Image via Gaussian-Mixture Feature Alignment and Curriculum Integration","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18836","citing_title":"Spectral Gradient Surgery for Domain-Generalizable Dataset Distillation","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18893","citing_title":"Position: Graph Condensation Needs a Reset -- Move Beyond Full-dataset Training and Model-Dependence","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18012","citing_title":"SAS: Semantic-aware Sampling for Generative Dataset Distillation","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12942","citing_title":"From Compression to Accountability: Harmless Copyright Protection for Dataset Distillation","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2506.01942","citing_title":"OD3: Optimization-free Dataset Distillation for Object Detection","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2510.17421","citing_title":"Diffusion Models as Dataset Distillation Priors","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2603.26093","citing_title":"ROAST: Risk-aware Outlier-exposure for Adversarial Selective Training of Anomaly Detectors Against Evasion Attacks","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00832","citing_title":"Synthetic Designed Experiments for Diagnosing Vision Model Failure","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12649","citing_title":"DIVER:Diving Deeper into Distilled Data via Expressive Semantic Recovery","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12942","citing_title":"From Compression to Accountability: Harmless Copyright Protection for Dataset Distillation","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2309.12284","citing_title":"MetaMath: Bootstrap Your Own Mathematical Questions for Large Language Models","ref_index":73,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08616","citing_title":"Robust Server Defense Against Unreliable Clients in One-Shot Fair Collaborative Machine Learning","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04569","citing_title":"Lightning Unified Video Editing via In-Context Sparse Attention","ref_index":284,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00185","citing_title":"Fair Dataset Distillation via Cross-Group Barycenter Alignment","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12941","citing_title":"Direct Discrepancy Replay: Distribution-Discrepancy Condensation and Manifold-Consistent Replay for Continual Face Forgery Detection","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2604.10666","citing_title":"Omnimodal Dataset Distillation via High-order Proxy Alignment","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07194","citing_title":"Closed-Form Linear-Probe Dataset Distillation for Pre-trained Vision Models","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07940","citing_title":"A Systematic Framework for Tabular Data Disentanglement","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18135","citing_title":"Soft Label Pruning and Quantization for Large-Scale Dataset Distillation","ref_index":1,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5","json":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5.json","graph_json":"https://pith.science/api/pith-number/ORMB5QL25ELHY6N64A7DXYSAK5/graph.json","events_json":"https://pith.science/api/pith-number/ORMB5QL25ELHY6N64A7DXYSAK5/events.json","paper":"https://pith.science/paper/ORMB5QL2"},"agent_actions":{"view_html":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5","download_json":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5.json","view_paper":"https://pith.science/paper/ORMB5QL2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.10959&json=true","fetch_graph":"https://pith.science/api/pith-number/ORMB5QL25ELHY6N64A7DXYSAK5/graph.json","fetch_events":"https://pith.science/api/pith-number/ORMB5QL25ELHY6N64A7DXYSAK5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5/action/storage_attestation","attest_author":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5/action/author_attestation","sign_citation":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5/action/citation_signature","submit_replication":"https://pith.science/pith/ORMB5QL25ELHY6N64A7DXYSAK5/action/replication_record"}},"created_at":"2026-05-23T23:50:24.040202+00:00","updated_at":"2026-05-23T23:50:24.040202+00:00"}