{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:3B3DJ2DW62TD5QSMKRMKTNS2W6","short_pith_number":"pith:3B3DJ2DW","schema_version":"1.0","canonical_sha256":"d87634e876f6a63ec24c5458a9b65ab79340785df9fe4f8e5050fb297030ee32","source":{"kind":"arxiv","id":"1802.05668","version":1},"attestation_state":"computed","paper":{"title":"Model compression via distillation and quantization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"Antonio Polino, Dan Alistarh, Razvan Pascanu","submitted_at":"2018-02-15T17:18:49Z","abstract_excerpt":"Deep neural networks (DNNs) continue to make significant advances, solving tasks from image classification to translation or reinforcement learning. One aspect of the field receiving considerable attention is efficiently executing deep models in resource-constrained environments, such as mobile or embedded devices. This paper focuses on this problem, and proposes two new compression methods, which jointly leverage weight quantization and distillation of larger teacher networks into smaller student networks. The first method we propose is called quantized distillation and leverages distillation"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.05668","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.NE","submitted_at":"2018-02-15T17:18:49Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"b1e73438645565f83aa7d02bf0d917a6cc7335cf7764cbb2c1c346292c27f23d","abstract_canon_sha256":"49518e5ddad4011166556242596f95a74535fc4fe59e752b79e6bb8d4ce6e1a7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:23:14.949874Z","signature_b64":"It1ovcVg0QZoPJAUCvK9JNSSyNCUcHWD8eyVst4J3fuMbxrZDJi9baJv64onN/rhlLkCV6DTQmhq0onbAwx/CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d87634e876f6a63ec24c5458a9b65ab79340785df9fe4f8e5050fb297030ee32","last_reissued_at":"2026-05-18T00:23:14.949232Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:23:14.949232Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Model compression via distillation and quantization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"Antonio Polino, Dan Alistarh, Razvan Pascanu","submitted_at":"2018-02-15T17:18:49Z","abstract_excerpt":"Deep neural networks (DNNs) continue to make significant advances, solving tasks from image classification to translation or reinforcement learning. One aspect of the field receiving considerable attention is efficiently executing deep models in resource-constrained environments, such as mobile or embedded devices. This paper focuses on this problem, and proposes two new compression methods, which jointly leverage weight quantization and distillation of larger teacher networks into smaller student networks. The first method we propose is called quantized distillation and leverages distillation"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.05668","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.05668","created_at":"2026-05-18T00:23:14.949331+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.05668v1","created_at":"2026-05-18T00:23:14.949331+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.05668","created_at":"2026-05-18T00:23:14.949331+00:00"},{"alias_kind":"pith_short_12","alias_value":"3B3DJ2DW62TD","created_at":"2026-05-18T12:32:02.567920+00:00"},{"alias_kind":"pith_short_16","alias_value":"3B3DJ2DW62TD5QSM","created_at":"2026-05-18T12:32:02.567920+00:00"},{"alias_kind":"pith_short_8","alias_value":"3B3DJ2DW","created_at":"2026-05-18T12:32:02.567920+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2112.11447","citing_title":"Multi-Modality Distillation via Learning the teacher's modality-level Gram Matrix","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2412.15689","citing_title":"DOLLAR: Few-Step Video Generation via Distillation and Latent Reward Optimization","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2505.06907","citing_title":"A Survey on Foundation Models for Personalized Federated Intelligence","ref_index":198,"is_internal_anchor":true},{"citing_arxiv_id":"2509.11419","citing_title":"Knowledge Distillation for Sensing-Assisted Long-Term Beam Tracking in mmWave Communications","ref_index":47,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19842","citing_title":"Fast Tensorization of Neural Networks via Slice-wise Feature Distillation","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2506.22726","citing_title":"XTransfer: Modality-Agnostic Few-Shot Model Transfer for Human Sensing at the Edge","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11189","citing_title":"Deep Learning for Protein Complex Prediction and Design","ref_index":28,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6","json":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6.json","graph_json":"https://pith.science/api/pith-number/3B3DJ2DW62TD5QSMKRMKTNS2W6/graph.json","events_json":"https://pith.science/api/pith-number/3B3DJ2DW62TD5QSMKRMKTNS2W6/events.json","paper":"https://pith.science/paper/3B3DJ2DW"},"agent_actions":{"view_html":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6","download_json":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6.json","view_paper":"https://pith.science/paper/3B3DJ2DW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.05668&json=true","fetch_graph":"https://pith.science/api/pith-number/3B3DJ2DW62TD5QSMKRMKTNS2W6/graph.json","fetch_events":"https://pith.science/api/pith-number/3B3DJ2DW62TD5QSMKRMKTNS2W6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6/action/storage_attestation","attest_author":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6/action/author_attestation","sign_citation":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6/action/citation_signature","submit_replication":"https://pith.science/pith/3B3DJ2DW62TD5QSMKRMKTNS2W6/action/replication_record"}},"created_at":"2026-05-18T00:23:14.949331+00:00","updated_at":"2026-05-18T00:23:14.949331+00:00"}