{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:GOCZ47QHFOX4PAYW2QESOVXMFK","short_pith_number":"pith:GOCZ47QH","schema_version":"1.0","canonical_sha256":"33859e7e072bafc78316d4092756ec2ab7ff0a3d17e683fd6020310d1b475a0c","source":{"kind":"arxiv","id":"1511.06481","version":7},"attestation_state":"computed","paper":{"title":"Variance Reduction in SGD by Distributed Importance Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Aaron Courville, Alex Lamb, Chinnadhurai Sankar, Guillaume Alain, Yoshua Bengio","submitted_at":"2015-11-20T03:09:43Z","abstract_excerpt":"Humans are able to accelerate their learning by selecting training materials that are the most informative and at the appropriate level of difficulty. We propose a framework for distributing deep learning in which one set of workers search for the most informative examples in parallel while a single worker updates the model on examples selected by importance sampling. This leads the model to update using an unbiased estimate of the gradient which also has minimum variance when the sampling proposal is proportional to the L2-norm of the gradient. We show experimentally that this method reduces "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.06481","kind":"arxiv","version":7},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-11-20T03:09:43Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"85463df761c71df42a3020b858196ac55ce14d386371a888fae435f04946807c","abstract_canon_sha256":"475760c776f586207e038c53471f15fd340d18188419fc7cad24cc90ead84564"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:16:59.981808Z","signature_b64":"oAa1fw8O4BHMKqxECRukyRbj9Lkd9k4Ofolm74Yfvc3o1K5OksY7ZcVY87ZE9dUmWV8VBkVPVIm7/ErHdD9hCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"33859e7e072bafc78316d4092756ec2ab7ff0a3d17e683fd6020310d1b475a0c","last_reissued_at":"2026-05-18T01:16:59.981181Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:16:59.981181Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Variance Reduction in SGD by Distributed Importance Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Aaron Courville, Alex Lamb, Chinnadhurai Sankar, Guillaume Alain, Yoshua Bengio","submitted_at":"2015-11-20T03:09:43Z","abstract_excerpt":"Humans are able to accelerate their learning by selecting training materials that are the most informative and at the appropriate level of difficulty. We propose a framework for distributing deep learning in which one set of workers search for the most informative examples in parallel while a single worker updates the model on examples selected by importance sampling. This leads the model to update using an unbiased estimate of the gradient which also has minimum variance when the sampling proposal is proportional to the L2-norm of the gradient. We show experimentally that this method reduces "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.06481","kind":"arxiv","version":7},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.06481","created_at":"2026-05-18T01:16:59.981283+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.06481v7","created_at":"2026-05-18T01:16:59.981283+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.06481","created_at":"2026-05-18T01:16:59.981283+00:00"},{"alias_kind":"pith_short_12","alias_value":"GOCZ47QHFOX4","created_at":"2026-05-18T12:29:22.688609+00:00"},{"alias_kind":"pith_short_16","alias_value":"GOCZ47QHFOX4PAYW","created_at":"2026-05-18T12:29:22.688609+00:00"},{"alias_kind":"pith_short_8","alias_value":"GOCZ47QH","created_at":"2026-05-18T12:29:22.688609+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.08771","citing_title":"Submodular Batch Selection for Training Deep Neural Networks","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.28020","citing_title":"Cost-Aware Learning","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07551","citing_title":"Disagreement-Regularized Importance Sampling for Adversarial Label Corruption","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07397","citing_title":"Data Warmup: Complexity-Aware Curricula for Efficient Diffusion Training","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2604.04681","citing_title":"Batch Loss Score for Dynamic Data Pruning","ref_index":2,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK","json":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK.json","graph_json":"https://pith.science/api/pith-number/GOCZ47QHFOX4PAYW2QESOVXMFK/graph.json","events_json":"https://pith.science/api/pith-number/GOCZ47QHFOX4PAYW2QESOVXMFK/events.json","paper":"https://pith.science/paper/GOCZ47QH"},"agent_actions":{"view_html":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK","download_json":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK.json","view_paper":"https://pith.science/paper/GOCZ47QH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.06481&json=true","fetch_graph":"https://pith.science/api/pith-number/GOCZ47QHFOX4PAYW2QESOVXMFK/graph.json","fetch_events":"https://pith.science/api/pith-number/GOCZ47QHFOX4PAYW2QESOVXMFK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK/action/storage_attestation","attest_author":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK/action/author_attestation","sign_citation":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK/action/citation_signature","submit_replication":"https://pith.science/pith/GOCZ47QHFOX4PAYW2QESOVXMFK/action/replication_record"}},"created_at":"2026-05-18T01:16:59.981283+00:00","updated_at":"2026-05-18T01:16:59.981283+00:00"}