{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:DG3FWFSFHEUI2DFWOJQYGVXLOO","short_pith_number":"pith:DG3FWFSF","schema_version":"1.0","canonical_sha256":"19b65b164539288d0cb672618356eb73b3860180df9b691e514edb868b60cd05","source":{"kind":"arxiv","id":"1803.05591","version":2},"attestation_state":"computed","paper":{"title":"On the insufficiency of existing momentum schemes for Stochastic Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Praneeth Netrapalli, Prateek Jain, Rahul Kidambi, Sham M. Kakade","submitted_at":"2018-03-15T05:09:51Z","abstract_excerpt":"Momentum based stochastic gradient methods such as heavy ball (HB) and Nesterov's accelerated gradient descent (NAG) method are widely used in practice for training deep networks and other supervised learning models, as they often provide significant improvements over stochastic gradient descent (SGD). Rigorously speaking, \"fast gradient\" methods have provable improvements over gradient descent only for the deterministic case, where the gradients are exact. In the stochastic case, the popular explanations for their wide applicability is that when these fast gradient methods are applied in the "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1803.05591","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-15T05:09:51Z","cross_cats_sorted":["math.OC","stat.ML"],"title_canon_sha256":"6a002061431c3241a87df5969afc1039ed11b1aa1bf333b88f152043349be845","abstract_canon_sha256":"0c04694eb8155707b31f15dd74fc8d37896ae082bc927c4423a1434038b63668"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:09:09.505111Z","signature_b64":"QQ2dTsfatGgX/D87lJRTxsZEWlgQJt156aYm3R85r1RvCMuJalFriURQHuvgfcu9iiLqqIjF4K3bq66ToW6nAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"19b65b164539288d0cb672618356eb73b3860180df9b691e514edb868b60cd05","last_reissued_at":"2026-05-18T00:09:09.504432Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:09:09.504432Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"On the insufficiency of existing momentum schemes for Stochastic Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Praneeth Netrapalli, Prateek Jain, Rahul Kidambi, Sham M. Kakade","submitted_at":"2018-03-15T05:09:51Z","abstract_excerpt":"Momentum based stochastic gradient methods such as heavy ball (HB) and Nesterov's accelerated gradient descent (NAG) method are widely used in practice for training deep networks and other supervised learning models, as they often provide significant improvements over stochastic gradient descent (SGD). Rigorously speaking, \"fast gradient\" methods have provable improvements over gradient descent only for the deterministic case, where the gradients are exact. In the stochastic case, the popular explanations for their wide applicability is that when these fast gradient methods are applied in the "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.05591","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1803.05591","created_at":"2026-05-18T00:09:09.504547+00:00"},{"alias_kind":"arxiv_version","alias_value":"1803.05591v2","created_at":"2026-05-18T00:09:09.504547+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.05591","created_at":"2026-05-18T00:09:09.504547+00:00"},{"alias_kind":"pith_short_12","alias_value":"DG3FWFSFHEUI","created_at":"2026-05-18T12:32:19.392346+00:00"},{"alias_kind":"pith_short_16","alias_value":"DG3FWFSFHEUI2DFW","created_at":"2026-05-18T12:32:19.392346+00:00"},{"alias_kind":"pith_short_8","alias_value":"DG3FWFSF","created_at":"2026-05-18T12:32:19.392346+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2603.09355","citing_title":"SHANG++: Robust Stochastic Acceleration under Multiplicative Noise","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14108","citing_title":"Momentum Further Constrains Sharpness at the Edge of Stochastic Stability","ref_index":20,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO","json":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO.json","graph_json":"https://pith.science/api/pith-number/DG3FWFSFHEUI2DFWOJQYGVXLOO/graph.json","events_json":"https://pith.science/api/pith-number/DG3FWFSFHEUI2DFWOJQYGVXLOO/events.json","paper":"https://pith.science/paper/DG3FWFSF"},"agent_actions":{"view_html":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO","download_json":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO.json","view_paper":"https://pith.science/paper/DG3FWFSF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1803.05591&json=true","fetch_graph":"https://pith.science/api/pith-number/DG3FWFSFHEUI2DFWOJQYGVXLOO/graph.json","fetch_events":"https://pith.science/api/pith-number/DG3FWFSFHEUI2DFWOJQYGVXLOO/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO/action/storage_attestation","attest_author":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO/action/author_attestation","sign_citation":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO/action/citation_signature","submit_replication":"https://pith.science/pith/DG3FWFSFHEUI2DFWOJQYGVXLOO/action/replication_record"}},"created_at":"2026-05-18T00:09:09.504547+00:00","updated_at":"2026-05-18T00:09:09.504547+00:00"}