{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:ZXEDOWZBG65IRULH6MGZ4ZCHXV","short_pith_number":"pith:ZXEDOWZB","canonical_record":{"source":{"id":"1812.00914","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-03T17:15:44Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7f550687080b4d526d6843a880066929a741415f7841245fcaa5b75b13847837","abstract_canon_sha256":"c270802ba12d336b198fa360367ffcd7a777876393c43cb13ecd47ee96354cb3"},"schema_version":"1.0"},"canonical_sha256":"cdc8375b2137ba88d167f30d9e6447bd4875d8900ea5d01e75d11f498016d155","source":{"kind":"arxiv","id":"1812.00914","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.00914","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"arxiv_version","alias_value":"1812.00914v1","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.00914","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"pith_short_12","alias_value":"ZXEDOWZBG65I","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZXEDOWZBG65IRULH","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZXEDOWZB","created_at":"2026-05-18T12:33:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:ZXEDOWZBG65IRULH6MGZ4ZCHXV","target":"record","payload":{"canonical_record":{"source":{"id":"1812.00914","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-03T17:15:44Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7f550687080b4d526d6843a880066929a741415f7841245fcaa5b75b13847837","abstract_canon_sha256":"c270802ba12d336b198fa360367ffcd7a777876393c43cb13ecd47ee96354cb3"},"schema_version":"1.0"},"canonical_sha256":"cdc8375b2137ba88d167f30d9e6447bd4875d8900ea5d01e75d11f498016d155","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:18.604353Z","signature_b64":"Angj1mqTgqJfKgmDgyw2Gh/9P2f/DON/vTUwaXBmyNeKcihTrA8NVZIFbspWSBUj+lZNZqunhORotQgBUa9nBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cdc8375b2137ba88d167f30d9e6447bd4875d8900ea5d01e75d11f498016d155","last_reissued_at":"2026-05-17T23:59:18.603916Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:18.603916Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.00914","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MYcCCSEQ1C+uYyBTs3MBYvnUgHf3e7MHZPnkp89rJ4oP/pvfur5+jOuKoac7h5uZ6plPJ9MGjJw3PG23U8mLCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T06:02:57.484925Z"},"content_sha256":"ab3d3d4c3b2a443bf58ff49f8c3d3019224c151dac47fd11f09978234d92be67","schema_version":"1.0","event_id":"sha256:ab3d3d4c3b2a443bf58ff49f8c3d3019224c151dac47fd11f09978234d92be67"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:ZXEDOWZBG65IRULH6MGZ4ZCHXV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Accelerating Large Scale Knowledge Distillation via Dynamic Importance Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Martha White, Minghan Li, Ruicheng Li, Tanli Zuo, Weishi Zheng","submitted_at":"2018-12-03T17:15:44Z","abstract_excerpt":"Knowledge distillation is an effective technique that transfers knowledge from a large teacher model to a shallow student. However, just like massive classification, large scale knowledge distillation also imposes heavy computational costs on training models of deep neural networks, as the softmax activations at the last layer involve computing probabilities over numerous classes. In this work, we apply the idea of importance sampling which is often used in Neural Machine Translation on large scale knowledge distillation. We present a method called dynamic importance sampling, where ranked cla"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.00914","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pNuoJuY5k5ZUD76kG1PTbnHPl+D2JsucCJGbyv8yuJH+XhdbQDerNJUvyhAzwdLweVBiI3bUDOQhR5qQawiDDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T06:02:57.485276Z"},"content_sha256":"f271fef27766ad0f31410290e1b67327bbffd6cdf2a95ca36bf99d1bffcd5e80","schema_version":"1.0","event_id":"sha256:f271fef27766ad0f31410290e1b67327bbffd6cdf2a95ca36bf99d1bffcd5e80"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV/bundle.json","state_url":"https://pith.science/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T06:02:57Z","links":{"resolver":"https://pith.science/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV","bundle":"https://pith.science/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV/bundle.json","state":"https://pith.science/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZXEDOWZBG65IRULH6MGZ4ZCHXV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:ZXEDOWZBG65IRULH6MGZ4ZCHXV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c270802ba12d336b198fa360367ffcd7a777876393c43cb13ecd47ee96354cb3","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-03T17:15:44Z","title_canon_sha256":"7f550687080b4d526d6843a880066929a741415f7841245fcaa5b75b13847837"},"schema_version":"1.0","source":{"id":"1812.00914","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.00914","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"arxiv_version","alias_value":"1812.00914v1","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.00914","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"pith_short_12","alias_value":"ZXEDOWZBG65I","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZXEDOWZBG65IRULH","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZXEDOWZB","created_at":"2026-05-18T12:33:07Z"}],"graph_snapshots":[{"event_id":"sha256:f271fef27766ad0f31410290e1b67327bbffd6cdf2a95ca36bf99d1bffcd5e80","target":"graph","created_at":"2026-05-17T23:59:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Knowledge distillation is an effective technique that transfers knowledge from a large teacher model to a shallow student. However, just like massive classification, large scale knowledge distillation also imposes heavy computational costs on training models of deep neural networks, as the softmax activations at the last layer involve computing probabilities over numerous classes. In this work, we apply the idea of importance sampling which is often used in Neural Machine Translation on large scale knowledge distillation. We present a method called dynamic importance sampling, where ranked cla","authors_text":"Martha White, Minghan Li, Ruicheng Li, Tanli Zuo, Weishi Zheng","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-03T17:15:44Z","title":"Accelerating Large Scale Knowledge Distillation via Dynamic Importance Sampling"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.00914","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ab3d3d4c3b2a443bf58ff49f8c3d3019224c151dac47fd11f09978234d92be67","target":"record","created_at":"2026-05-17T23:59:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c270802ba12d336b198fa360367ffcd7a777876393c43cb13ecd47ee96354cb3","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-03T17:15:44Z","title_canon_sha256":"7f550687080b4d526d6843a880066929a741415f7841245fcaa5b75b13847837"},"schema_version":"1.0","source":{"id":"1812.00914","kind":"arxiv","version":1}},"canonical_sha256":"cdc8375b2137ba88d167f30d9e6447bd4875d8900ea5d01e75d11f498016d155","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cdc8375b2137ba88d167f30d9e6447bd4875d8900ea5d01e75d11f498016d155","first_computed_at":"2026-05-17T23:59:18.603916Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:59:18.603916Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Angj1mqTgqJfKgmDgyw2Gh/9P2f/DON/vTUwaXBmyNeKcihTrA8NVZIFbspWSBUj+lZNZqunhORotQgBUa9nBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:59:18.604353Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.00914","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ab3d3d4c3b2a443bf58ff49f8c3d3019224c151dac47fd11f09978234d92be67","sha256:f271fef27766ad0f31410290e1b67327bbffd6cdf2a95ca36bf99d1bffcd5e80"],"state_sha256":"705cf3f4d251065c4caef273a895a8d80f2977876dfaf71e70b078b0c1cf7bdd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TSK2zigacPCJmUr0bB0V+0ST9lHXeLM/KmPdu2Rc4zvn1fDWooTWNO+nvuLB6GWsJq9ashg7P5A8WUxYXs7MBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T06:02:57.487435Z","bundle_sha256":"77bad8e531a15eee0295b491fde25caea22f8bfd67361b77ae6bb391bea28dbf"}}