{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:EXA7RSNMKXCFSGMK7AKK654RMA","short_pith_number":"pith:EXA7RSNM","schema_version":"1.0","canonical_sha256":"25c1f8c9ac55c459198af814af7791602fcfe8d8feb89e7dc6b46f4a46c7cfe5","source":{"kind":"arxiv","id":"2605.18694","version":1},"attestation_state":"computed","paper":{"title":"Can Adaptive Gradient Methods Converge under Heavy-Tailed Noise? A Case Study of AdaGrad","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"math.OC","authors_text":"Zijian Liu","submitted_at":"2026-05-18T17:30:15Z","abstract_excerpt":"Many tasks in modern machine learning are observed to involve heavy-tailed gradient noise during the optimization process. To manage this realistic and challenging setting, new mechanisms, such as gradient clipping and gradient normalization, have been introduced to ensure the convergence of first-order algorithms. However, adaptive gradient methods, a famous class of modern optimizers that includes popular $\\mathtt{Adam}$ and $\\mathtt{AdamW}$, often perform well even without any extra operations mentioned above. It is therefore natural to ask whether adaptive gradient methods can converge und"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.18694","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2026-05-18T17:30:15Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"852e2397aad318b10a1b88e86207caf685195447248fc7e88593e68e5d6d3571","abstract_canon_sha256":"9d11a8679fb220d64409b8c6390d03e4fb52b64b002dfbe5759cd879e9188753"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:06:15.420379Z","signature_b64":"tam8w8EpiJPnwcm1wbBrdPnpj3UacHNe1ahCdKeTsPgzJZleZxzKUx+gqSaIUwRoHJIfnVksSXfY2D7t9L55BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"25c1f8c9ac55c459198af814af7791602fcfe8d8feb89e7dc6b46f4a46c7cfe5","last_reissued_at":"2026-05-20T00:06:15.419472Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:06:15.419472Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Can Adaptive Gradient Methods Converge under Heavy-Tailed Noise? A Case Study of AdaGrad","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"math.OC","authors_text":"Zijian Liu","submitted_at":"2026-05-18T17:30:15Z","abstract_excerpt":"Many tasks in modern machine learning are observed to involve heavy-tailed gradient noise during the optimization process. To manage this realistic and challenging setting, new mechanisms, such as gradient clipping and gradient normalization, have been introduced to ensure the convergence of first-order algorithms. However, adaptive gradient methods, a famous class of modern optimizers that includes popular $\\mathtt{Adam}$ and $\\mathtt{AdamW}$, often perform well even without any extra operations mentioned above. It is therefore natural to ask whether adaptive gradient methods can converge und"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18694","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18694/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-20T00:01:59.090592Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"0ee12c7285d16d2c60647522e18db5ec70733384bb7c6f327af9051aaea64718"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.18694","created_at":"2026-05-20T00:06:15.419610+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.18694v1","created_at":"2026-05-20T00:06:15.419610+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18694","created_at":"2026-05-20T00:06:15.419610+00:00"},{"alias_kind":"pith_short_12","alias_value":"EXA7RSNMKXCF","created_at":"2026-05-20T00:06:15.419610+00:00"},{"alias_kind":"pith_short_16","alias_value":"EXA7RSNMKXCFSGMK","created_at":"2026-05-20T00:06:15.419610+00:00"},{"alias_kind":"pith_short_8","alias_value":"EXA7RSNM","created_at":"2026-05-20T00:06:15.419610+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA","json":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA.json","graph_json":"https://pith.science/api/pith-number/EXA7RSNMKXCFSGMK7AKK654RMA/graph.json","events_json":"https://pith.science/api/pith-number/EXA7RSNMKXCFSGMK7AKK654RMA/events.json","paper":"https://pith.science/paper/EXA7RSNM"},"agent_actions":{"view_html":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA","download_json":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA.json","view_paper":"https://pith.science/paper/EXA7RSNM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.18694&json=true","fetch_graph":"https://pith.science/api/pith-number/EXA7RSNMKXCFSGMK7AKK654RMA/graph.json","fetch_events":"https://pith.science/api/pith-number/EXA7RSNMKXCFSGMK7AKK654RMA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA/action/storage_attestation","attest_author":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA/action/author_attestation","sign_citation":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA/action/citation_signature","submit_replication":"https://pith.science/pith/EXA7RSNMKXCFSGMK7AKK654RMA/action/replication_record"}},"created_at":"2026-05-20T00:06:15.419610+00:00","updated_at":"2026-05-20T00:06:15.419610+00:00"}