{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:4NODSCEFTVHHRZBW4GPTTPKQMR","short_pith_number":"pith:4NODSCEF","schema_version":"1.0","canonical_sha256":"e35c3908859d4e78e436e19f39bd506461d0174bd76047d779e894f3fc02e4d8","source":{"kind":"arxiv","id":"1405.3080","version":1},"attestation_state":"computed","paper":{"title":"Accelerating Minibatch Stochastic Gradient Descent using Stratified Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","math.OC"],"primary_cat":"stat.ML","authors_text":"Peilin Zhao, Tong Zhang","submitted_at":"2014-05-13T09:45:49Z","abstract_excerpt":"Stochastic Gradient Descent (SGD) is a popular optimization method which has been applied to many important machine learning tasks such as Support Vector Machines and Deep Neural Networks. In order to parallelize SGD, minibatch training is often employed. The standard approach is to uniformly sample a minibatch at each step, which often leads to high variance. In this paper we propose a stratified sampling strategy, which divides the whole dataset into clusters with low within-cluster variance; we then take examples from these clusters using a stratified sampling technique. It is shown that th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1405.3080","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-05-13T09:45:49Z","cross_cats_sorted":["cs.LG","math.OC"],"title_canon_sha256":"be9acab3d318596ed8d431707ed18e44aec1b33351b10a45bdff1e67c9516ae5","abstract_canon_sha256":"6481d35f05d1ab27134c65928d922ac7e206ac23b319799d6b15221ee26c69de"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:51:57.814259Z","signature_b64":"ydeOo4zn7jQYQ6jmOYT8bCpPw0zJoBohDsvKRhJsgdkgGf/t9vXg6VfbdcM7TABmoLRM88ckfajyu586mJ9oDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e35c3908859d4e78e436e19f39bd506461d0174bd76047d779e894f3fc02e4d8","last_reissued_at":"2026-05-18T02:51:57.813816Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:51:57.813816Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Accelerating Minibatch Stochastic Gradient Descent using Stratified Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","math.OC"],"primary_cat":"stat.ML","authors_text":"Peilin Zhao, Tong Zhang","submitted_at":"2014-05-13T09:45:49Z","abstract_excerpt":"Stochastic Gradient Descent (SGD) is a popular optimization method which has been applied to many important machine learning tasks such as Support Vector Machines and Deep Neural Networks. In order to parallelize SGD, minibatch training is often employed. The standard approach is to uniformly sample a minibatch at each step, which often leads to high variance. In this paper we propose a stratified sampling strategy, which divides the whole dataset into clusters with low within-cluster variance; we then take examples from these clusters using a stratified sampling technique. It is shown that th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1405.3080","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1405.3080","created_at":"2026-05-18T02:51:57.813884+00:00"},{"alias_kind":"arxiv_version","alias_value":"1405.3080v1","created_at":"2026-05-18T02:51:57.813884+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1405.3080","created_at":"2026-05-18T02:51:57.813884+00:00"},{"alias_kind":"pith_short_12","alias_value":"4NODSCEFTVHH","created_at":"2026-05-18T12:28:14.216126+00:00"},{"alias_kind":"pith_short_16","alias_value":"4NODSCEFTVHHRZBW","created_at":"2026-05-18T12:28:14.216126+00:00"},{"alias_kind":"pith_short_8","alias_value":"4NODSCEF","created_at":"2026-05-18T12:28:14.216126+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1906.08771","citing_title":"Submodular Batch Selection for Training Deep Neural Networks","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2512.05226","citing_title":"Variance Matters: Improving Domain Adaptation via Stratified Sampling","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06350","citing_title":"Convergence of Riemannian Stochastic Gradient Descents: Varying Batch Sizes And Nonstandard Batch Forming","ref_index":29,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR","json":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR.json","graph_json":"https://pith.science/api/pith-number/4NODSCEFTVHHRZBW4GPTTPKQMR/graph.json","events_json":"https://pith.science/api/pith-number/4NODSCEFTVHHRZBW4GPTTPKQMR/events.json","paper":"https://pith.science/paper/4NODSCEF"},"agent_actions":{"view_html":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR","download_json":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR.json","view_paper":"https://pith.science/paper/4NODSCEF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1405.3080&json=true","fetch_graph":"https://pith.science/api/pith-number/4NODSCEFTVHHRZBW4GPTTPKQMR/graph.json","fetch_events":"https://pith.science/api/pith-number/4NODSCEFTVHHRZBW4GPTTPKQMR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR/action/storage_attestation","attest_author":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR/action/author_attestation","sign_citation":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR/action/citation_signature","submit_replication":"https://pith.science/pith/4NODSCEFTVHHRZBW4GPTTPKQMR/action/replication_record"}},"created_at":"2026-05-18T02:51:57.813884+00:00","updated_at":"2026-05-18T02:51:57.813884+00:00"}