{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:GSIZEKHALY27KEG3NQ6BC7RHVZ","short_pith_number":"pith:GSIZEKHA","schema_version":"1.0","canonical_sha256":"34919228e05e35f510db6c3c117e27ae76efbea87f3b6ad01c127634d74bb948","source":{"kind":"arxiv","id":"2605.23893","version":1},"attestation_state":"computed","paper":{"title":"Complete-muE: Optimal Hyperparameter Transfer and Scaling for MoE Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Hongwu Peng, Jianming Zhang, Ohiremen Dibua, Yan Kang, Yifan Gong, Yuanjun Xiong","submitted_at":"2026-05-22T17:56:13Z","abstract_excerpt":"We propose Complete-muE, a framework which targets hyperparameter transfer across dense FFN and any Mixture-of-Experts (MoE) setups in transformer blocks. Existing tools such as $\\mu$P (requires fixed architectue) or SDE (requires fixed per-step token count) cannot directly solve the hyperparameter transfer problem in MoE setups because Dense to MoE transfer or MoE total experts scaling changes both architecture and tokens per expert. Complete-muE solves this challenge with a two-bridge system: Bridge~I maps between dense FFN and Dense MoE by active-width $\\mu$P with a normalized router scale."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.23893","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-22T17:56:13Z","cross_cats_sorted":[],"title_canon_sha256":"e98777be7607a3a21c0d825182fbe378aacdd30664c0fa266086dc6e2772b6ec","abstract_canon_sha256":"efd04aa63997d5817938ddf3c0f23c2839a2811a17cdd725d07b8ee20005b33c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:02:38.179347Z","signature_b64":"MzloU1xVTG5gQ/gDTa5DhoABtT6BLRARa1alBcbi7Pry4z9ebz+2mKPBGOSDShCPESgMuHkrPEFRSO75s+C5Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"34919228e05e35f510db6c3c117e27ae76efbea87f3b6ad01c127634d74bb948","last_reissued_at":"2026-05-25T02:02:38.178633Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:02:38.178633Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Complete-muE: Optimal Hyperparameter Transfer and Scaling for MoE Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Hongwu Peng, Jianming Zhang, Ohiremen Dibua, Yan Kang, Yifan Gong, Yuanjun Xiong","submitted_at":"2026-05-22T17:56:13Z","abstract_excerpt":"We propose Complete-muE, a framework which targets hyperparameter transfer across dense FFN and any Mixture-of-Experts (MoE) setups in transformer blocks. Existing tools such as $\\mu$P (requires fixed architectue) or SDE (requires fixed per-step token count) cannot directly solve the hyperparameter transfer problem in MoE setups because Dense to MoE transfer or MoE total experts scaling changes both architecture and tokens per expert. Complete-muE solves this challenge with a two-bridge system: Bridge~I maps between dense FFN and Dense MoE by active-width $\\mu$P with a normalized router scale."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23893","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.23893/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.23893","created_at":"2026-05-25T02:02:38.178763+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.23893v1","created_at":"2026-05-25T02:02:38.178763+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23893","created_at":"2026-05-25T02:02:38.178763+00:00"},{"alias_kind":"pith_short_12","alias_value":"GSIZEKHALY27","created_at":"2026-05-25T02:02:38.178763+00:00"},{"alias_kind":"pith_short_16","alias_value":"GSIZEKHALY27KEG3","created_at":"2026-05-25T02:02:38.178763+00:00"},{"alias_kind":"pith_short_8","alias_value":"GSIZEKHA","created_at":"2026-05-25T02:02:38.178763+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ","json":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ.json","graph_json":"https://pith.science/api/pith-number/GSIZEKHALY27KEG3NQ6BC7RHVZ/graph.json","events_json":"https://pith.science/api/pith-number/GSIZEKHALY27KEG3NQ6BC7RHVZ/events.json","paper":"https://pith.science/paper/GSIZEKHA"},"agent_actions":{"view_html":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ","download_json":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ.json","view_paper":"https://pith.science/paper/GSIZEKHA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.23893&json=true","fetch_graph":"https://pith.science/api/pith-number/GSIZEKHALY27KEG3NQ6BC7RHVZ/graph.json","fetch_events":"https://pith.science/api/pith-number/GSIZEKHALY27KEG3NQ6BC7RHVZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ/action/storage_attestation","attest_author":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ/action/author_attestation","sign_citation":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ/action/citation_signature","submit_replication":"https://pith.science/pith/GSIZEKHALY27KEG3NQ6BC7RHVZ/action/replication_record"}},"created_at":"2026-05-25T02:02:38.178763+00:00","updated_at":"2026-05-25T02:02:38.178763+00:00"}