{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:T7HEGN6BTWMERPLDPJJ6AEJ6HM","short_pith_number":"pith:T7HEGN6B","schema_version":"1.0","canonical_sha256":"9fce4337c19d9848bd637a53e0113e3b18afa69ae1b43ac9c8a1ff6ee123dc13","source":{"kind":"arxiv","id":"1903.00760","version":2},"attestation_state":"computed","paper":{"title":"Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alan Sullivan, Wenju Xu, Ziming Zhang","submitted_at":"2019-03-02T20:21:38Z","abstract_excerpt":"In this paper we study the problem of convergence and generalization error bound of stochastic momentum for deep learning from the perspective of regularization. To do so, we first interpret momentum as solving an $\\ell_2$-regularized minimization problem to learn the offsets between arbitrary two successive model parameters. We call this {\\em time-delay momentum} because the model parameter is updated after a few iterations towards finding the minimizer. We then propose our learning algorithm, \\ie stochastic gradient descent (SGD) with time-delay momentum. We show that our algorithm can be in"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.00760","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-02T20:21:38Z","cross_cats_sorted":["cs.CV","stat.ML"],"title_canon_sha256":"da6e605b1e18cbd43cc4322bc356736283990db6258ed77f782193cb076daa17","abstract_canon_sha256":"da627e77eb2a2e206d199ddd05196de16420b31125f0210633864ade59d01392"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:29.177883Z","signature_b64":"R/5+6R9UjzrpOjZyyFIrk32g8dUWqCHycxIftffhBxy1Y3jPoNgXz05K5qmgKzO2KxJ7g5Xt7J2AOi8gfPkrCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9fce4337c19d9848bd637a53e0113e3b18afa69ae1b43ac9c8a1ff6ee123dc13","last_reissued_at":"2026-05-17T23:44:29.177376Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:29.177376Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alan Sullivan, Wenju Xu, Ziming Zhang","submitted_at":"2019-03-02T20:21:38Z","abstract_excerpt":"In this paper we study the problem of convergence and generalization error bound of stochastic momentum for deep learning from the perspective of regularization. To do so, we first interpret momentum as solving an $\\ell_2$-regularized minimization problem to learn the offsets between arbitrary two successive model parameters. We call this {\\em time-delay momentum} because the model parameter is updated after a few iterations towards finding the minimizer. We then propose our learning algorithm, \\ie stochastic gradient descent (SGD) with time-delay momentum. We show that our algorithm can be in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.00760","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.00760","created_at":"2026-05-17T23:44:29.177464+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.00760v2","created_at":"2026-05-17T23:44:29.177464+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.00760","created_at":"2026-05-17T23:44:29.177464+00:00"},{"alias_kind":"pith_short_12","alias_value":"T7HEGN6BTWME","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"T7HEGN6BTWMERPLD","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"T7HEGN6B","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM","json":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM.json","graph_json":"https://pith.science/api/pith-number/T7HEGN6BTWMERPLDPJJ6AEJ6HM/graph.json","events_json":"https://pith.science/api/pith-number/T7HEGN6BTWMERPLDPJJ6AEJ6HM/events.json","paper":"https://pith.science/paper/T7HEGN6B"},"agent_actions":{"view_html":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM","download_json":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM.json","view_paper":"https://pith.science/paper/T7HEGN6B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.00760&json=true","fetch_graph":"https://pith.science/api/pith-number/T7HEGN6BTWMERPLDPJJ6AEJ6HM/graph.json","fetch_events":"https://pith.science/api/pith-number/T7HEGN6BTWMERPLDPJJ6AEJ6HM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM/action/storage_attestation","attest_author":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM/action/author_attestation","sign_citation":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM/action/citation_signature","submit_replication":"https://pith.science/pith/T7HEGN6BTWMERPLDPJJ6AEJ6HM/action/replication_record"}},"created_at":"2026-05-17T23:44:29.177464+00:00","updated_at":"2026-05-17T23:44:29.177464+00:00"}