{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:XEHWE6FYAGDEIWSF6KOVEOECIX","short_pith_number":"pith:XEHWE6FY","schema_version":"1.0","canonical_sha256":"b90f6278b80186445a45f29d52388245d67df25cc5fd526d632a46cb81569983","source":{"kind":"arxiv","id":"2606.00651","version":1},"attestation_state":"computed","paper":{"title":"MESA: Improving MoE Safety Alignment via Decentralized Expertise","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Hui Xue, Ranjie Duan, Teng Li, Xingjun Ma, Xingxing Wei, Yao Huang, Yichi Zhang, Yitong Sun","submitted_at":"2026-05-30T09:54:38Z","abstract_excerpt":"Mixture-of-Experts (MoE) architectures scale Large Language Models (LLMs) efficiently, enabling greater capacity with reduced computational cost by dynamically routing inputs to relevant experts, yet introduce a critical vulnerability: Safety Sparsity, where safety capabilities concentrate in few experts, making them susceptible to adversarial bypassing. Meanwhile, conventional alignment methods uniformly adapt all parameters, ignoring their functional differences and inadvertently degrading performances. To address these challenges, we propose MESA (MoE Safety Alignment), a targeted alignment"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.00651","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-30T09:54:38Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a9d6e18c13e1ce35c13ba13640c0333386ce3308e3ba776fa2c3273ac3141fb4","abstract_canon_sha256":"1cdf80f722df44300bf983f03070ab33e2c7a8c34650a2ad56c77e47d2ff7272"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:04:00.978918Z","signature_b64":"/p/c9RxwGl0h6Z1/qr3s2lUKWSc1pQsX/UvqHW2TY1MOpHOz5HxBFnbbDXqUKfLVLXSJa5ftQ1AHUkc/gjjbCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b90f6278b80186445a45f29d52388245d67df25cc5fd526d632a46cb81569983","last_reissued_at":"2026-06-02T01:04:00.978504Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:04:00.978504Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MESA: Improving MoE Safety Alignment via Decentralized Expertise","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Hui Xue, Ranjie Duan, Teng Li, Xingjun Ma, Xingxing Wei, Yao Huang, Yichi Zhang, Yitong Sun","submitted_at":"2026-05-30T09:54:38Z","abstract_excerpt":"Mixture-of-Experts (MoE) architectures scale Large Language Models (LLMs) efficiently, enabling greater capacity with reduced computational cost by dynamically routing inputs to relevant experts, yet introduce a critical vulnerability: Safety Sparsity, where safety capabilities concentrate in few experts, making them susceptible to adversarial bypassing. Meanwhile, conventional alignment methods uniformly adapt all parameters, ignoring their functional differences and inadvertently degrading performances. To address these challenges, we propose MESA (MoE Safety Alignment), a targeted alignment"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.00651","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.00651/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.00651","created_at":"2026-06-02T01:04:00.978559+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.00651v1","created_at":"2026-06-02T01:04:00.978559+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.00651","created_at":"2026-06-02T01:04:00.978559+00:00"},{"alias_kind":"pith_short_12","alias_value":"XEHWE6FYAGDE","created_at":"2026-06-02T01:04:00.978559+00:00"},{"alias_kind":"pith_short_16","alias_value":"XEHWE6FYAGDEIWSF","created_at":"2026-06-02T01:04:00.978559+00:00"},{"alias_kind":"pith_short_8","alias_value":"XEHWE6FY","created_at":"2026-06-02T01:04:00.978559+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX","json":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX.json","graph_json":"https://pith.science/api/pith-number/XEHWE6FYAGDEIWSF6KOVEOECIX/graph.json","events_json":"https://pith.science/api/pith-number/XEHWE6FYAGDEIWSF6KOVEOECIX/events.json","paper":"https://pith.science/paper/XEHWE6FY"},"agent_actions":{"view_html":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX","download_json":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX.json","view_paper":"https://pith.science/paper/XEHWE6FY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.00651&json=true","fetch_graph":"https://pith.science/api/pith-number/XEHWE6FYAGDEIWSF6KOVEOECIX/graph.json","fetch_events":"https://pith.science/api/pith-number/XEHWE6FYAGDEIWSF6KOVEOECIX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX/action/storage_attestation","attest_author":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX/action/author_attestation","sign_citation":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX/action/citation_signature","submit_replication":"https://pith.science/pith/XEHWE6FYAGDEIWSF6KOVEOECIX/action/replication_record"}},"created_at":"2026-06-02T01:04:00.978559+00:00","updated_at":"2026-06-02T01:04:00.978559+00:00"}