{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:S4D5UJMV2W4Q7XXGGZ4IRKLOS5","short_pith_number":"pith:S4D5UJMV","schema_version":"1.0","canonical_sha256":"9707da2595d5b90fdee6367888a96e97551798a17d90088c311a18b94f19f642","source":{"kind":"arxiv","id":"2504.09951","version":2},"attestation_state":"computed","paper":{"title":"Towards Weaker Variance Assumptions for Stochastic Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"math.OC","authors_text":"Ahmet Alacaoglu, Stephen J. Wright, Yura Malitsky","submitted_at":"2025-04-14T07:26:34Z","abstract_excerpt":"We revisit a classical assumption for analyzing stochastic gradient algorithms where the squared norm of the stochastic subgradient (or the variance for smooth problems) is allowed to grow as fast as the squared norm of the optimization variable. We contextualize this assumption in view of its inception in the 1960s, its seemingly independent appearance in the recent literature, its relationship to weakest-known variance assumptions for analyzing stochastic gradient algorithms, and its relevance in deterministic problems for non-Lipschitz nonsmooth convex optimization. We build on and extend a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2504.09951","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2025-04-14T07:26:34Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"38ea5f1b6d42eaf2a388df2a0b94b38028ed8c09e3676cb05f9b12a49873f1bd","abstract_canon_sha256":"8798c877deb16a54d88e792e6a03f641afad1e3f2f25b4cde07348be9ecfa093"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:18:02.627114Z","signature_b64":"MF30+Tpln6qc6Li/AcBhwg3EZeySkevlxQ/jlv9CxJ5Q3jrIXcqqUYid4tjaZBYMrEFxtH4zmWP3G5/ySUN1Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9707da2595d5b90fdee6367888a96e97551798a17d90088c311a18b94f19f642","last_reissued_at":"2026-07-02T01:18:02.626658Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:18:02.626658Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Weaker Variance Assumptions for Stochastic Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"math.OC","authors_text":"Ahmet Alacaoglu, Stephen J. Wright, Yura Malitsky","submitted_at":"2025-04-14T07:26:34Z","abstract_excerpt":"We revisit a classical assumption for analyzing stochastic gradient algorithms where the squared norm of the stochastic subgradient (or the variance for smooth problems) is allowed to grow as fast as the squared norm of the optimization variable. We contextualize this assumption in view of its inception in the 1960s, its seemingly independent appearance in the recent literature, its relationship to weakest-known variance assumptions for analyzing stochastic gradient algorithms, and its relevance in deterministic problems for non-Lipschitz nonsmooth convex optimization. We build on and extend a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2504.09951","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2504.09951/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2504.09951","created_at":"2026-07-02T01:18:02.626719+00:00"},{"alias_kind":"arxiv_version","alias_value":"2504.09951v2","created_at":"2026-07-02T01:18:02.626719+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.09951","created_at":"2026-07-02T01:18:02.626719+00:00"},{"alias_kind":"pith_short_12","alias_value":"S4D5UJMV2W4Q","created_at":"2026-07-02T01:18:02.626719+00:00"},{"alias_kind":"pith_short_16","alias_value":"S4D5UJMV2W4Q7XXG","created_at":"2026-07-02T01:18:02.626719+00:00"},{"alias_kind":"pith_short_8","alias_value":"S4D5UJMV","created_at":"2026-07-02T01:18:02.626719+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2606.10110","citing_title":"The Dual Averaging Power-Prox Method with Application to Heavy-Tail Incremental Gradient","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15388","citing_title":"Unified High-Probability Analysis of Stochastic Variance-Reduced Estimation","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15314","citing_title":"Beyond Bounded Variance: Variance-Reduced Normalized Methods for Nonconvex Optimization under Blum-Gladyshev Noise","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07531","citing_title":"SGD for Variational Inference: Tackling Unbounded Variance via Preconditioning and Dynamic Batching","ref_index":1,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5","json":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5.json","graph_json":"https://pith.science/api/pith-number/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/graph.json","events_json":"https://pith.science/api/pith-number/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/events.json","paper":"https://pith.science/paper/S4D5UJMV"},"agent_actions":{"view_html":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5","download_json":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5.json","view_paper":"https://pith.science/paper/S4D5UJMV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2504.09951&json=true","fetch_graph":"https://pith.science/api/pith-number/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/graph.json","fetch_events":"https://pith.science/api/pith-number/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/action/storage_attestation","attest_author":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/action/author_attestation","sign_citation":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/action/citation_signature","submit_replication":"https://pith.science/pith/S4D5UJMV2W4Q7XXGGZ4IRKLOS5/action/replication_record"}},"created_at":"2026-07-02T01:18:02.626719+00:00","updated_at":"2026-07-02T01:18:02.626719+00:00"}