{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:3BTXRKHD5NCIQVAT4RR4R2Z64N","short_pith_number":"pith:3BTXRKHD","schema_version":"1.0","canonical_sha256":"d86778a8e3eb44885413e463c8eb3ee370f742e3c581f18ec9c1607538aba33e","source":{"kind":"arxiv","id":"2510.13999","version":3},"attestation_state":"computed","paper":{"title":"REAP the Experts: Why Pruning Prevails for One-Shot MoE compression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Ivan Lazarevich, Mike Lasby, Nish Sinnadurai, Sean Lie, Vithursan Thangarasa, Yani Ioannou","submitted_at":"2025-10-15T18:29:28Z","abstract_excerpt":"Sparsely-activated Mixture-of-Experts (SMoE) models offer efficient pre-training and low latency but their large parameter counts create significant memory overhead, motivating research into expert compression. Contrary to recent findings favouring expert merging on discriminative benchmarks, we find that expert pruning is a superior strategy for generative tasks. We demonstrate that existing merging techniques introduce an irreducible error due to the loss of fine-grained routing control over experts. Leveraging this insight, we propose Router-weighted Expert Activation Pruning (REAP), a nove"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.13999","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-15T18:29:28Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"bd76ec03f536c8c6071ed3021e92a8a51534f7eb9957115796966891f84757f9","abstract_canon_sha256":"c648eea18869308aceb0f59a3f7b30cbeb8f8b831db962e0e4a84e8596b7cafc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:33.856482Z","signature_b64":"mcQAde8sp4FjD/fczhYqR41TsxPnq7JKkoL0BioTjAE+Lzr9FxXb6ja73jTKNSZgjJnQ1uy0BmFbGWGvGuQOAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d86778a8e3eb44885413e463c8eb3ee370f742e3c581f18ec9c1607538aba33e","last_reissued_at":"2026-05-18T03:09:33.855730Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:33.855730Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"REAP the Experts: Why Pruning Prevails for One-Shot MoE compression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Ivan Lazarevich, Mike Lasby, Nish Sinnadurai, Sean Lie, Vithursan Thangarasa, Yani Ioannou","submitted_at":"2025-10-15T18:29:28Z","abstract_excerpt":"Sparsely-activated Mixture-of-Experts (SMoE) models offer efficient pre-training and low latency but their large parameter counts create significant memory overhead, motivating research into expert compression. Contrary to recent findings favouring expert merging on discriminative benchmarks, we find that expert pruning is a superior strategy for generative tasks. We demonstrate that existing merging techniques introduce an irreducible error due to the loss of fine-grained routing control over experts. Leveraging this insight, we propose Router-weighted Expert Activation Pruning (REAP), a nove"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.13999","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.13999","created_at":"2026-05-18T03:09:33.855859+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.13999v3","created_at":"2026-05-18T03:09:33.855859+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.13999","created_at":"2026-05-18T03:09:33.855859+00:00"},{"alias_kind":"pith_short_12","alias_value":"3BTXRKHD5NCI","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"3BTXRKHD5NCIQVAT","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"3BTXRKHD","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2605.08738","citing_title":"SlimQwen: Exploring the Pruning and Distillation in Large MoE Model Pre-training","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2603.06003","citing_title":"EvoESAP: Non-Uniform Expert Pruning for Sparse MoE","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13997","citing_title":"HodgeCover: Higher-Order Topological Coverage Drives Compression of Sparse Mixture-of-Experts","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08738","citing_title":"SlimQwen: Exploring the Pruning and Distillation in Large MoE Model Pre-training","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00649","citing_title":"Model Compression with Exact Budget Constraints via Riemannian Manifolds","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07182","citing_title":"Star Elastic: Many-in-One Reasoning LLMs with Efficient Budget Control","ref_index":17,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N","json":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N.json","graph_json":"https://pith.science/api/pith-number/3BTXRKHD5NCIQVAT4RR4R2Z64N/graph.json","events_json":"https://pith.science/api/pith-number/3BTXRKHD5NCIQVAT4RR4R2Z64N/events.json","paper":"https://pith.science/paper/3BTXRKHD"},"agent_actions":{"view_html":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N","download_json":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N.json","view_paper":"https://pith.science/paper/3BTXRKHD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.13999&json=true","fetch_graph":"https://pith.science/api/pith-number/3BTXRKHD5NCIQVAT4RR4R2Z64N/graph.json","fetch_events":"https://pith.science/api/pith-number/3BTXRKHD5NCIQVAT4RR4R2Z64N/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N/action/storage_attestation","attest_author":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N/action/author_attestation","sign_citation":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N/action/citation_signature","submit_replication":"https://pith.science/pith/3BTXRKHD5NCIQVAT4RR4R2Z64N/action/replication_record"}},"created_at":"2026-05-18T03:09:33.855859+00:00","updated_at":"2026-05-18T03:09:33.855859+00:00"}