{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:SBV655U5G6KECBSAOJLJIEXJBZ","short_pith_number":"pith:SBV655U5","schema_version":"1.0","canonical_sha256":"906beef69d379441064072569412e90e70cc97bc9e0665f927f5a3b883216881","source":{"kind":"arxiv","id":"2606.21514","version":1},"attestation_state":"computed","paper":{"title":"Towards Understanding the Power and Limits of the Muon Optimizer: A River-Valley Perspective","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jianhao Ma, Jiaye Teng, Jinji Yang, Runze Shi, Tianqi Shen, Ziye Ma","submitted_at":"2026-06-19T15:10:20Z","abstract_excerpt":"Recently, Muon has gained substantial attention as an appealing alternative to Adam-like optimizers, with many works highlighting its advantages through spectral normalization and improved conditioning. Yet this positive theoretical narrative contrasts with its empirical performance in large language model (LLM) training, where Muon's gains over Adam/AdamW are often mixed, schedule-sensitive, and not uniformly superior. To address this gap, we develop a trajectory-level theory characterizing both the strengths and limitations of Muon. We introduce a mixed-spiked matrix sensing model whose sens"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.21514","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-19T15:10:20Z","cross_cats_sorted":[],"title_canon_sha256":"17201ac3c37d506a6e587b21a0ea8c2f8d6ba9cdc75216eea27e81e6c252b65c","abstract_canon_sha256":"a8cc8f14bd72dec7512a18d6ff67572067cee2c84405e90eb30c48f14d0d8959"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:13:13.154269Z","signature_b64":"WlPBGaVKv0C5jvTmshp7QDWDYewSPqZPfSKfPUGze4rr4oyOhWaqbXsqfpQCfSEO6xYA2RToy1jp7ZxEsYx1CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"906beef69d379441064072569412e90e70cc97bc9e0665f927f5a3b883216881","last_reissued_at":"2026-06-23T01:13:13.153783Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:13:13.153783Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Understanding the Power and Limits of the Muon Optimizer: A River-Valley Perspective","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jianhao Ma, Jiaye Teng, Jinji Yang, Runze Shi, Tianqi Shen, Ziye Ma","submitted_at":"2026-06-19T15:10:20Z","abstract_excerpt":"Recently, Muon has gained substantial attention as an appealing alternative to Adam-like optimizers, with many works highlighting its advantages through spectral normalization and improved conditioning. Yet this positive theoretical narrative contrasts with its empirical performance in large language model (LLM) training, where Muon's gains over Adam/AdamW are often mixed, schedule-sensitive, and not uniformly superior. To address this gap, we develop a trajectory-level theory characterizing both the strengths and limitations of Muon. We introduce a mixed-spiked matrix sensing model whose sens"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21514","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21514/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.21514","created_at":"2026-06-23T01:13:13.153856+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.21514v1","created_at":"2026-06-23T01:13:13.153856+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21514","created_at":"2026-06-23T01:13:13.153856+00:00"},{"alias_kind":"pith_short_12","alias_value":"SBV655U5G6KE","created_at":"2026-06-23T01:13:13.153856+00:00"},{"alias_kind":"pith_short_16","alias_value":"SBV655U5G6KECBSA","created_at":"2026-06-23T01:13:13.153856+00:00"},{"alias_kind":"pith_short_8","alias_value":"SBV655U5","created_at":"2026-06-23T01:13:13.153856+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ","json":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ.json","graph_json":"https://pith.science/api/pith-number/SBV655U5G6KECBSAOJLJIEXJBZ/graph.json","events_json":"https://pith.science/api/pith-number/SBV655U5G6KECBSAOJLJIEXJBZ/events.json","paper":"https://pith.science/paper/SBV655U5"},"agent_actions":{"view_html":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ","download_json":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ.json","view_paper":"https://pith.science/paper/SBV655U5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.21514&json=true","fetch_graph":"https://pith.science/api/pith-number/SBV655U5G6KECBSAOJLJIEXJBZ/graph.json","fetch_events":"https://pith.science/api/pith-number/SBV655U5G6KECBSAOJLJIEXJBZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ/action/storage_attestation","attest_author":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ/action/author_attestation","sign_citation":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ/action/citation_signature","submit_replication":"https://pith.science/pith/SBV655U5G6KECBSAOJLJIEXJBZ/action/replication_record"}},"created_at":"2026-06-23T01:13:13.153856+00:00","updated_at":"2026-06-23T01:13:13.153856+00:00"}