{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2013:D52N3J2WQLSLTFYSDVZ62DOE5U","short_pith_number":"pith:D52N3J2W","schema_version":"1.0","canonical_sha256":"1f74dda75682e4b997121d73ed0dc4ed304383fcfd7ba0c7e141268daa67f819","source":{"kind":"arxiv","id":"1312.5663","version":2},"attestation_state":"computed","paper":{"title":"k-Sparse Autoencoders","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alireza Makhzani, Brendan Frey","submitted_at":"2013-12-19T17:46:46Z","abstract_excerpt":"Recently, it has been observed that when representations are learnt in a way that encourages sparsity, improved performance is obtained on classification tasks. These methods involve combinations of activation functions, sampling steps and different kinds of penalties. To investigate the effectiveness of sparsity by itself, we propose the k-sparse autoencoder, which is an autoencoder with linear activation function, where in hidden layers only the k highest activities are kept. When applied to the MNIST and NORB datasets, we find that this method achieves better classification results than den"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1312.5663","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2013-12-19T17:46:46Z","cross_cats_sorted":[],"title_canon_sha256":"4b31b6389bbc189c137f711b0a90646b9c46035030721b97638393a4e9e071f6","abstract_canon_sha256":"d761cf60c29911e950d7157c46a4de12de45cc009e8746753d9dfec59b3a150b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:55:49.627508Z","signature_b64":"G/yU2cKjIclFxytE1YgoMwx8vLmKh/bLEF08/qajqBjd0I7Lt/2FoqBwMtAjr8JFH6M2jekc8l9gLXrrS3tqAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1f74dda75682e4b997121d73ed0dc4ed304383fcfd7ba0c7e141268daa67f819","last_reissued_at":"2026-05-18T02:55:49.626799Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:55:49.626799Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"k-Sparse Autoencoders","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alireza Makhzani, Brendan Frey","submitted_at":"2013-12-19T17:46:46Z","abstract_excerpt":"Recently, it has been observed that when representations are learnt in a way that encourages sparsity, improved performance is obtained on classification tasks. These methods involve combinations of activation functions, sampling steps and different kinds of penalties. To investigate the effectiveness of sparsity by itself, we propose the k-sparse autoencoder, which is an autoencoder with linear activation function, where in hidden layers only the k highest activities are kept. When applied to the MNIST and NORB datasets, we find that this method achieves better classification results than den"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1312.5663","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1312.5663","created_at":"2026-05-18T02:55:49.626909+00:00"},{"alias_kind":"arxiv_version","alias_value":"1312.5663v2","created_at":"2026-05-18T02:55:49.626909+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1312.5663","created_at":"2026-05-18T02:55:49.626909+00:00"},{"alias_kind":"pith_short_12","alias_value":"D52N3J2WQLSL","created_at":"2026-05-18T12:27:40.988391+00:00"},{"alias_kind":"pith_short_16","alias_value":"D52N3J2WQLSLTFYS","created_at":"2026-05-18T12:27:40.988391+00:00"},{"alias_kind":"pith_short_8","alias_value":"D52N3J2W","created_at":"2026-05-18T12:27:40.988391+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":21,"internal_anchor_count":8,"sample":[{"citing_arxiv_id":"2502.14888","citing_title":"Beyond Cross-Modal Alignment: Measuring and Leveraging Modality Gap in Vision-Language Models","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22472","citing_title":"Winner-Take-All bottlenecks enforce disentangled symbolic representations in multi-task learning","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12770","citing_title":"WriteSAE: Sparse Autoencoders for Recurrent State","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12770","citing_title":"WriteSAE: Sparse Autoencoders for Recurrent State","ref_index":86,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12770","citing_title":"WriteSAE: Sparse Autoencoders for Recurrent State","ref_index":86,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04946","citing_title":"Sparse Autoencoders as a Steering Basis for Phase Synchronization in Graph-Based CFD Surrogates","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12770","citing_title":"WriteSAE: Sparse Autoencoders for Recurrent State","ref_index":86,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12809","citing_title":"Correcting Influence: Unboxing LLM Outputs with Orthogonal Latent Spaces","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02871","citing_title":"SPG: Sparse-Projected Guides with Sparse Autoencoders for Zero-Shot Anomaly Detection","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2403.19647","citing_title":"Sparse Feature Circuits: Discovering and Editing Interpretable Causal Graphs in Language Models","ref_index":45,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10971","citing_title":"Steering Without Breaking: Mechanistically Informed Interventions for Discrete Diffusion Language Models","ref_index":41,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12122","citing_title":"Disentangled Sparse Representations for Concept-Separated Diffusion Unlearning","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12225","citing_title":"Mechanistic Interpretability of ASR models using Sparse Autoencoders","ref_index":8,"is_internal_anchor":false},{"citing_arxiv_id":"2406.04093","citing_title":"Scaling and evaluating sparse autoencoders","ref_index":36,"is_internal_anchor":false},{"citing_arxiv_id":"2604.26409","citing_title":"Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08298","citing_title":"What Cohort INRs Encode and Where to Freeze Them","ref_index":36,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08740","citing_title":"Causal Dimensionality of Transformer Representations: Measurement, Scaling, and Layer Structure","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2511.13720","citing_title":"Back to Basics: Let Denoising Generative Models Denoise","ref_index":41,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01829","citing_title":"GeoSAE: Geometric Prior-Guided Layer-Wise Sparse Autoencoder Annotation of Brain MRI Foundation Models","ref_index":30,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08846","citing_title":"Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs","ref_index":61,"is_internal_anchor":false},{"citing_arxiv_id":"2604.14925","citing_title":"Improving Sparse Autoencoder with Dynamic Attention","ref_index":37,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U","json":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U.json","graph_json":"https://pith.science/api/pith-number/D52N3J2WQLSLTFYSDVZ62DOE5U/graph.json","events_json":"https://pith.science/api/pith-number/D52N3J2WQLSLTFYSDVZ62DOE5U/events.json","paper":"https://pith.science/paper/D52N3J2W"},"agent_actions":{"view_html":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U","download_json":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U.json","view_paper":"https://pith.science/paper/D52N3J2W","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1312.5663&json=true","fetch_graph":"https://pith.science/api/pith-number/D52N3J2WQLSLTFYSDVZ62DOE5U/graph.json","fetch_events":"https://pith.science/api/pith-number/D52N3J2WQLSLTFYSDVZ62DOE5U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U/action/storage_attestation","attest_author":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U/action/author_attestation","sign_citation":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U/action/citation_signature","submit_replication":"https://pith.science/pith/D52N3J2WQLSLTFYSDVZ62DOE5U/action/replication_record"}},"created_at":"2026-05-18T02:55:49.626909+00:00","updated_at":"2026-05-18T02:55:49.626909+00:00"}