{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:AGW62PWN7WJWR7RHYFU6YCTQEA","short_pith_number":"pith:AGW62PWN","schema_version":"1.0","canonical_sha256":"01aded3ecdfd9368fe27c169ec0a70201549e666d84b46179b94754cc0701943","source":{"kind":"arxiv","id":"2605.18106","version":1},"attestation_state":"computed","paper":{"title":"Symmetry-Compatible Principle for Optimizer Design: Embeddings, LM Heads, SwiGLU MLPs, and MoE Routers","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","stat.ML"],"primary_cat":"math.OC","authors_text":"Tim Tsz-Kit Lau, Weijie Su","submitted_at":"2026-05-18T09:17:26Z","abstract_excerpt":"A striking geometric disparity has long persisted in the practice of deep learning. While modern neural network architectures naturally exhibit rich symmetry and equivariance properties, popular optimizers such as Adam and its variants operate inherently coordinate-wise, rendering them unable to respect the equivariance structures of the parameter space. We address this disparity by introducing a symmetry-compatible principle for optimizer design: the gradient update rule should be equivariant under the symmetry group acting on the corresponding weight block. Following this principle, we first"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.18106","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"math.OC","submitted_at":"2026-05-18T09:17:26Z","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"title_canon_sha256":"b73a3b7f9c2a5fe09a2df9d25b75825b77f0d9ff376cc6bcd44b38ccb1086189","abstract_canon_sha256":"4d1e837dc255c73e9aabdd747dd4186a6c3a8e9419359216da526552cb1b3a46"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:05:16.257962Z","signature_b64":"pKr+9vpv5LD0x9dpNzqlMj4duj5ljb6aGYA73jozrvtsYtI/aPY3WZNcwdUl/CQf+LVBzlZ2d/5XBZmXNrKUDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"01aded3ecdfd9368fe27c169ec0a70201549e666d84b46179b94754cc0701943","last_reissued_at":"2026-05-20T00:05:16.256873Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:05:16.256873Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Symmetry-Compatible Principle for Optimizer Design: Embeddings, LM Heads, SwiGLU MLPs, and MoE Routers","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","stat.ML"],"primary_cat":"math.OC","authors_text":"Tim Tsz-Kit Lau, Weijie Su","submitted_at":"2026-05-18T09:17:26Z","abstract_excerpt":"A striking geometric disparity has long persisted in the practice of deep learning. While modern neural network architectures naturally exhibit rich symmetry and equivariance properties, popular optimizers such as Adam and its variants operate inherently coordinate-wise, rendering them unable to respect the equivariance structures of the parameter space. We address this disparity by introducing a symmetry-compatible principle for optimizer design: the gradient update rule should be equivariant under the symmetry group acting on the corresponding weight block. Following this principle, we first"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18106","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18106/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T23:41:59.174667Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.416092Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"ce35951ed1aa0671d11f562f63a7e4868b82366e3005f043a5d8a8a5c5cfb31c"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.18106","created_at":"2026-05-20T00:05:16.257007+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.18106v1","created_at":"2026-05-20T00:05:16.257007+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18106","created_at":"2026-05-20T00:05:16.257007+00:00"},{"alias_kind":"pith_short_12","alias_value":"AGW62PWN7WJW","created_at":"2026-05-20T00:05:16.257007+00:00"},{"alias_kind":"pith_short_16","alias_value":"AGW62PWN7WJWR7RH","created_at":"2026-05-20T00:05:16.257007+00:00"},{"alias_kind":"pith_short_8","alias_value":"AGW62PWN","created_at":"2026-05-20T00:05:16.257007+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA","json":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA.json","graph_json":"https://pith.science/api/pith-number/AGW62PWN7WJWR7RHYFU6YCTQEA/graph.json","events_json":"https://pith.science/api/pith-number/AGW62PWN7WJWR7RHYFU6YCTQEA/events.json","paper":"https://pith.science/paper/AGW62PWN"},"agent_actions":{"view_html":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA","download_json":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA.json","view_paper":"https://pith.science/paper/AGW62PWN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.18106&json=true","fetch_graph":"https://pith.science/api/pith-number/AGW62PWN7WJWR7RHYFU6YCTQEA/graph.json","fetch_events":"https://pith.science/api/pith-number/AGW62PWN7WJWR7RHYFU6YCTQEA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA/action/storage_attestation","attest_author":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA/action/author_attestation","sign_citation":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA/action/citation_signature","submit_replication":"https://pith.science/pith/AGW62PWN7WJWR7RHYFU6YCTQEA/action/replication_record"}},"created_at":"2026-05-20T00:05:16.257007+00:00","updated_at":"2026-05-20T00:05:16.257007+00:00"}