{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:NYJPOMCYKXAAR7PV3SNICJOQ5Z","short_pith_number":"pith:NYJPOMCY","schema_version":"1.0","canonical_sha256":"6e12f7305855c008fdf5dc9a8125d0ee6876b6f58555da2e1fd58b34620e6090","source":{"kind":"arxiv","id":"2605.19619","version":1},"attestation_state":"computed","paper":{"title":"MiMuon: Mixed Muon Optimizer with Improved Generalization for Large Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Feihu Huang, Songcan Chen, Yuning Luo","submitted_at":"2026-05-19T09:56:27Z","abstract_excerpt":"Matrix-structured parameters frequently appear in many artificial intelligence models such as large language models. More recently, an efficient Muon optimizer is designed for matrix parameters of large-scale models, and shows markedly faster convergence than the vector-wise algorithms. Although some works have begun to study convergence properties (i.e., optimization error) of the Muon optimizer, its generalization properties (i.e., generalization error) is still not established. Thus, in this paper, we study generalization error of the Muon optimizer based on algorithmic stability and mathem"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19619","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-19T09:56:27Z","cross_cats_sorted":["cs.AI","math.OC","stat.ML"],"title_canon_sha256":"43cc75cbe9d6ce3706d06a5d3ad6ccad5d3b0a47e61cfdf0425097aa5c91486a","abstract_canon_sha256":"eeaad9eae53ac5af0e72e40645e1e8714cda047235279cbb349a76ba94ab4277"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:54.535605Z","signature_b64":"jVhzTFhSIfW0cN4ZX9/zjhLSbrdRXLVP4j50AaGr8mGmWNHdrOgCyugk7Yd6QHZcR7YCs6JwUaTf47vVe7EqCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6e12f7305855c008fdf5dc9a8125d0ee6876b6f58555da2e1fd58b34620e6090","last_reissued_at":"2026-05-20T01:05:54.534806Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:54.534806Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MiMuon: Mixed Muon Optimizer with Improved Generalization for Large Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Feihu Huang, Songcan Chen, Yuning Luo","submitted_at":"2026-05-19T09:56:27Z","abstract_excerpt":"Matrix-structured parameters frequently appear in many artificial intelligence models such as large language models. More recently, an efficient Muon optimizer is designed for matrix parameters of large-scale models, and shows markedly faster convergence than the vector-wise algorithms. Although some works have begun to study convergence properties (i.e., optimization error) of the Muon optimizer, its generalization properties (i.e., generalization error) is still not established. Thus, in this paper, we study generalization error of the Muon optimizer based on algorithmic stability and mathem"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19619","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19619/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19619","created_at":"2026-05-20T01:05:54.534930+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19619v1","created_at":"2026-05-20T01:05:54.534930+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19619","created_at":"2026-05-20T01:05:54.534930+00:00"},{"alias_kind":"pith_short_12","alias_value":"NYJPOMCYKXAA","created_at":"2026-05-20T01:05:54.534930+00:00"},{"alias_kind":"pith_short_16","alias_value":"NYJPOMCYKXAAR7PV","created_at":"2026-05-20T01:05:54.534930+00:00"},{"alias_kind":"pith_short_8","alias_value":"NYJPOMCY","created_at":"2026-05-20T01:05:54.534930+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z","json":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z.json","graph_json":"https://pith.science/api/pith-number/NYJPOMCYKXAAR7PV3SNICJOQ5Z/graph.json","events_json":"https://pith.science/api/pith-number/NYJPOMCYKXAAR7PV3SNICJOQ5Z/events.json","paper":"https://pith.science/paper/NYJPOMCY"},"agent_actions":{"view_html":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z","download_json":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z.json","view_paper":"https://pith.science/paper/NYJPOMCY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19619&json=true","fetch_graph":"https://pith.science/api/pith-number/NYJPOMCYKXAAR7PV3SNICJOQ5Z/graph.json","fetch_events":"https://pith.science/api/pith-number/NYJPOMCYKXAAR7PV3SNICJOQ5Z/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z/action/storage_attestation","attest_author":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z/action/author_attestation","sign_citation":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z/action/citation_signature","submit_replication":"https://pith.science/pith/NYJPOMCYKXAAR7PV3SNICJOQ5Z/action/replication_record"}},"created_at":"2026-05-20T01:05:54.534930+00:00","updated_at":"2026-05-20T01:05:54.534930+00:00"}