{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:5JG5WJ4JQQKAMI57IEDKUVZQJ6","short_pith_number":"pith:5JG5WJ4J","schema_version":"1.0","canonical_sha256":"ea4ddb278984140623bf4106aa57304fb0e0900929e8e9c496b5bd518184f6c2","source":{"kind":"arxiv","id":"1706.00043","version":2},"attestation_state":"computed","paper":{"title":"Biased Importance Sampling for Deep Neural Network Training","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Angelos Katharopoulos, Fran\\c{c}ois Fleuret","submitted_at":"2017-05-31T18:25:09Z","abstract_excerpt":"Importance sampling has been successfully used to accelerate stochastic optimization in many convex problems. However, the lack of an efficient way to calculate the importance still hinders its application to Deep Learning.\n  In this paper, we show that the loss value can be used as an alternative importance metric, and propose a way to efficiently approximate it for a deep model, using a small model trained for that purpose in parallel.\n  This method allows in particular to utilize a biased gradient estimate that implicitly optimizes a soft max-loss, and leads to better generalization perform"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1706.00043","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-31T18:25:09Z","cross_cats_sorted":[],"title_canon_sha256":"7e245ca9034d9fe4c2a02ff673142a8720139aa75af7f007da04d3f4efcda8c1","abstract_canon_sha256":"d4ab5a2fab99bffa7753b685a113f1c34fa4a8814c3090c282c307327a140ef7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:35:16.151046Z","signature_b64":"0gWzMHqM7o0ro7wnBC2UDK1FfZ/nFa/Yx2tBVxmYkEs/JPPho1YkCXqd4o37iHvXZEEM2cbYDU20hsqsUsO2Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ea4ddb278984140623bf4106aa57304fb0e0900929e8e9c496b5bd518184f6c2","last_reissued_at":"2026-05-18T00:35:16.150543Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:35:16.150543Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Biased Importance Sampling for Deep Neural Network Training","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Angelos Katharopoulos, Fran\\c{c}ois Fleuret","submitted_at":"2017-05-31T18:25:09Z","abstract_excerpt":"Importance sampling has been successfully used to accelerate stochastic optimization in many convex problems. However, the lack of an efficient way to calculate the importance still hinders its application to Deep Learning.\n  In this paper, we show that the loss value can be used as an alternative importance metric, and propose a way to efficiently approximate it for a deep model, using a small model trained for that purpose in parallel.\n  This method allows in particular to utilize a biased gradient estimate that implicitly optimizes a soft max-loss, and leads to better generalization perform"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.00043","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1706.00043","created_at":"2026-05-18T00:35:16.150626+00:00"},{"alias_kind":"arxiv_version","alias_value":"1706.00043v2","created_at":"2026-05-18T00:35:16.150626+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.00043","created_at":"2026-05-18T00:35:16.150626+00:00"},{"alias_kind":"pith_short_12","alias_value":"5JG5WJ4JQQKA","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_16","alias_value":"5JG5WJ4JQQKAMI57","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_8","alias_value":"5JG5WJ4J","created_at":"2026-05-18T12:31:00.734936+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.08771","citing_title":"Submodular Batch Selection for Training Deep Neural Networks","ref_index":10,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6","json":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6.json","graph_json":"https://pith.science/api/pith-number/5JG5WJ4JQQKAMI57IEDKUVZQJ6/graph.json","events_json":"https://pith.science/api/pith-number/5JG5WJ4JQQKAMI57IEDKUVZQJ6/events.json","paper":"https://pith.science/paper/5JG5WJ4J"},"agent_actions":{"view_html":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6","download_json":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6.json","view_paper":"https://pith.science/paper/5JG5WJ4J","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1706.00043&json=true","fetch_graph":"https://pith.science/api/pith-number/5JG5WJ4JQQKAMI57IEDKUVZQJ6/graph.json","fetch_events":"https://pith.science/api/pith-number/5JG5WJ4JQQKAMI57IEDKUVZQJ6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6/action/storage_attestation","attest_author":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6/action/author_attestation","sign_citation":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6/action/citation_signature","submit_replication":"https://pith.science/pith/5JG5WJ4JQQKAMI57IEDKUVZQJ6/action/replication_record"}},"created_at":"2026-05-18T00:35:16.150626+00:00","updated_at":"2026-05-18T00:35:16.150626+00:00"}