{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CZ7WYBMTADSZM5YEZ2Q4YZYF2H","short_pith_number":"pith:CZ7WYBMT","schema_version":"1.0","canonical_sha256":"167f6c059300e5967704cea1cc6705d1d6103cb8cf64de064c4a7a43905c5876","source":{"kind":"arxiv","id":"2606.03899","version":1},"attestation_state":"computed","paper":{"title":"Denoise First, Orthogonalize Later: Understanding Momentum in Muon via Spectral Filtering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Han Bao, Weiyang Liu, Xianliang Li, Zihan Zhang","submitted_at":"2026-06-02T16:54:38Z","abstract_excerpt":"Muon has recently demonstrated strong empirical performance in large language model training, but the theoretical role of momentum in Muon remains unclear. Existing analyses of Muon either remove momentum to study spectral updates in isolation, or retain momentum without explaining why it improves empirical performance. Our work bridges this gap by showing momentum in Muon acts as a spectral filter. Under a structured signal-plus-perturbation gradient model, we prove that momentum suppresses perturbations while preserving the dominant signal, thereby enlarging the spectral gap between them. Th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.03899","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-02T16:54:38Z","cross_cats_sorted":[],"title_canon_sha256":"a76e0c450fbad4ff63fab1e78a007cfcac632d3833bd9500be4bf89080023b19","abstract_canon_sha256":"4a7bb47b6b61148fc5a66fa5c6020aba41160118a535198ad468b5fa856f7de9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:06:06.325455Z","signature_b64":"QzPPNa8Vt7SOhRqkbA1TeIUJ+Ss9OZxcXYU8UIwR7mmSVXt3bWDCUstNSQ64mC5AT0QbFkSvnNLlB/lglJGABw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"167f6c059300e5967704cea1cc6705d1d6103cb8cf64de064c4a7a43905c5876","last_reissued_at":"2026-06-03T02:06:06.325099Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:06:06.325099Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Denoise First, Orthogonalize Later: Understanding Momentum in Muon via Spectral Filtering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Han Bao, Weiyang Liu, Xianliang Li, Zihan Zhang","submitted_at":"2026-06-02T16:54:38Z","abstract_excerpt":"Muon has recently demonstrated strong empirical performance in large language model training, but the theoretical role of momentum in Muon remains unclear. Existing analyses of Muon either remove momentum to study spectral updates in isolation, or retain momentum without explaining why it improves empirical performance. Our work bridges this gap by showing momentum in Muon acts as a spectral filter. Under a structured signal-plus-perturbation gradient model, we prove that momentum suppresses perturbations while preserving the dominant signal, thereby enlarging the spectral gap between them. Th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03899","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.03899/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.03899","created_at":"2026-06-03T02:06:06.325159+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.03899v1","created_at":"2026-06-03T02:06:06.325159+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03899","created_at":"2026-06-03T02:06:06.325159+00:00"},{"alias_kind":"pith_short_12","alias_value":"CZ7WYBMTADSZ","created_at":"2026-06-03T02:06:06.325159+00:00"},{"alias_kind":"pith_short_16","alias_value":"CZ7WYBMTADSZM5YE","created_at":"2026-06-03T02:06:06.325159+00:00"},{"alias_kind":"pith_short_8","alias_value":"CZ7WYBMT","created_at":"2026-06-03T02:06:06.325159+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H","json":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H.json","graph_json":"https://pith.science/api/pith-number/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/graph.json","events_json":"https://pith.science/api/pith-number/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/events.json","paper":"https://pith.science/paper/CZ7WYBMT"},"agent_actions":{"view_html":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H","download_json":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H.json","view_paper":"https://pith.science/paper/CZ7WYBMT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.03899&json=true","fetch_graph":"https://pith.science/api/pith-number/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/graph.json","fetch_events":"https://pith.science/api/pith-number/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/action/storage_attestation","attest_author":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/action/author_attestation","sign_citation":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/action/citation_signature","submit_replication":"https://pith.science/pith/CZ7WYBMTADSZM5YEZ2Q4YZYF2H/action/replication_record"}},"created_at":"2026-06-03T02:06:06.325159+00:00","updated_at":"2026-06-03T02:06:06.325159+00:00"}