{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PO2EHLWZIGAR6ZIZVATXFAOW6A","short_pith_number":"pith:PO2EHLWZ","schema_version":"1.0","canonical_sha256":"7bb443aed941811f6519a8277281d6f023af10c5e36ce422b95fe81aa7f8233c","source":{"kind":"arxiv","id":"2603.03335","version":2},"attestation_state":"computed","paper":{"title":"Compressed Sensing for Capability Localization in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anna Bair, J. Zico Kolter, Mingjie Sun, Yixuan Even Xu","submitted_at":"2026-02-11T16:06:59Z","abstract_excerpt":"Large language models (LLMs) exhibit a wide range of capabilities, including mathematical reasoning, code generation, and linguistic behaviors. We show that Transformer architectures contain small subsets of attention heads that are necessary for certain capabilities. Zeroing out as few as five task-specific heads can degrade performance by up to $60\\%$ on standard benchmarks measuring the capability of interest, while largely preserving performance on unrelated tasks. We introduce a compressed sensing-based method that exploits the sparsity of these heads to identify them via strategic knocko"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.03335","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-02-11T16:06:59Z","cross_cats_sorted":[],"title_canon_sha256":"9ea32680f7a0f62414d893cc9fe9e7b785bdd4276a2fece3250eb6b2f5535750","abstract_canon_sha256":"a0595b426aa6d128dde3d3e274ac161a0ef16a407842f50e06da47e988edb0b7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T02:18:07.368640Z","signature_b64":"ukUx2ONFzeTEQHsNSkugPEwrzTB+hktMMbKnVQunrpRWfg+vqxQ1CwAtyK9mqzfV4PQSXLUosse2HoM7uttkDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7bb443aed941811f6519a8277281d6f023af10c5e36ce422b95fe81aa7f8233c","last_reissued_at":"2026-06-30T02:18:07.367988Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T02:18:07.367988Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Compressed Sensing for Capability Localization in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anna Bair, J. Zico Kolter, Mingjie Sun, Yixuan Even Xu","submitted_at":"2026-02-11T16:06:59Z","abstract_excerpt":"Large language models (LLMs) exhibit a wide range of capabilities, including mathematical reasoning, code generation, and linguistic behaviors. We show that Transformer architectures contain small subsets of attention heads that are necessary for certain capabilities. Zeroing out as few as five task-specific heads can degrade performance by up to $60\\%$ on standard benchmarks measuring the capability of interest, while largely preserving performance on unrelated tasks. We introduce a compressed sensing-based method that exploits the sparsity of these heads to identify them via strategic knocko"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.03335","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.03335/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.03335","created_at":"2026-06-30T02:18:07.368069+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.03335v2","created_at":"2026-06-30T02:18:07.368069+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.03335","created_at":"2026-06-30T02:18:07.368069+00:00"},{"alias_kind":"pith_short_12","alias_value":"PO2EHLWZIGAR","created_at":"2026-06-30T02:18:07.368069+00:00"},{"alias_kind":"pith_short_16","alias_value":"PO2EHLWZIGAR6ZIZ","created_at":"2026-06-30T02:18:07.368069+00:00"},{"alias_kind":"pith_short_8","alias_value":"PO2EHLWZ","created_at":"2026-06-30T02:18:07.368069+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2606.13594","citing_title":"See What I See, Know What I Think: Dense Latent Communication Across Heterogeneous Agents","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2606.08292","citing_title":"Ablation-Reversible Heads Don't Transfer: A Stress Test for Mechanistic Role Claims in Transformers","ref_index":2,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A","json":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A.json","graph_json":"https://pith.science/api/pith-number/PO2EHLWZIGAR6ZIZVATXFAOW6A/graph.json","events_json":"https://pith.science/api/pith-number/PO2EHLWZIGAR6ZIZVATXFAOW6A/events.json","paper":"https://pith.science/paper/PO2EHLWZ"},"agent_actions":{"view_html":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A","download_json":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A.json","view_paper":"https://pith.science/paper/PO2EHLWZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.03335&json=true","fetch_graph":"https://pith.science/api/pith-number/PO2EHLWZIGAR6ZIZVATXFAOW6A/graph.json","fetch_events":"https://pith.science/api/pith-number/PO2EHLWZIGAR6ZIZVATXFAOW6A/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A/action/storage_attestation","attest_author":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A/action/author_attestation","sign_citation":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A/action/citation_signature","submit_replication":"https://pith.science/pith/PO2EHLWZIGAR6ZIZVATXFAOW6A/action/replication_record"}},"created_at":"2026-06-30T02:18:07.368069+00:00","updated_at":"2026-06-30T02:18:07.368069+00:00"}