{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:75T7H6BVKLX2X7KPYCGRY4USAC","short_pith_number":"pith:75T7H6BV","schema_version":"1.0","canonical_sha256":"ff67f3f83552efabfd4fc08d1c729200851a441688b80aa5cd932bb6e8a70dbc","source":{"kind":"arxiv","id":"2605.25612","version":1},"attestation_state":"computed","paper":{"title":"Towards the Connection between Activation Sparsity and Flat Minima","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jian Zhang, Lei Qi, Yang Gao, Yinghuan Shi, Ze Peng","submitted_at":"2026-05-25T09:10:40Z","abstract_excerpt":"The observation that activation sparsity emerges in MLP blocks of standardly trained Transformers offers an opportunity to drastically reduce computation costs without sacrificing performance. To theoretically explain this phenomenon, existing works have shown that activation sparsity does not result from the data properties or data fitting but from the implicit bias of the training process. However, these connections are obtained with strong assumptions, which cannot be applied to deep models standardly trained with a large number of steps. Different from these works, we find that the flatnes"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.25612","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-25T09:10:40Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"8e540298417cbebf33070bdd0f44dbec78357ad63138bfada1770212da995b12","abstract_canon_sha256":"7f128a52594a7fc083f10126e2dfd33f89c6ad25ba298b15e77fac71be540961"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:04:46.467688Z","signature_b64":"no1tg5PhK/Gn2Xw/sewa5v7LUGy5tgFRNpeAHQr/j1D3sTd15YOLUGY6tKWr7V4Z6PDoz+kJ4p/6n5jjiS4SDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ff67f3f83552efabfd4fc08d1c729200851a441688b80aa5cd932bb6e8a70dbc","last_reissued_at":"2026-05-26T02:04:46.466857Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:04:46.466857Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards the Connection between Activation Sparsity and Flat Minima","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jian Zhang, Lei Qi, Yang Gao, Yinghuan Shi, Ze Peng","submitted_at":"2026-05-25T09:10:40Z","abstract_excerpt":"The observation that activation sparsity emerges in MLP blocks of standardly trained Transformers offers an opportunity to drastically reduce computation costs without sacrificing performance. To theoretically explain this phenomenon, existing works have shown that activation sparsity does not result from the data properties or data fitting but from the implicit bias of the training process. However, these connections are obtained with strong assumptions, which cannot be applied to deep models standardly trained with a large number of steps. Different from these works, we find that the flatnes"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.25612","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.25612/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.25612","created_at":"2026-05-26T02:04:46.466998+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.25612v1","created_at":"2026-05-26T02:04:46.466998+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.25612","created_at":"2026-05-26T02:04:46.466998+00:00"},{"alias_kind":"pith_short_12","alias_value":"75T7H6BVKLX2","created_at":"2026-05-26T02:04:46.466998+00:00"},{"alias_kind":"pith_short_16","alias_value":"75T7H6BVKLX2X7KP","created_at":"2026-05-26T02:04:46.466998+00:00"},{"alias_kind":"pith_short_8","alias_value":"75T7H6BV","created_at":"2026-05-26T02:04:46.466998+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC","json":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC.json","graph_json":"https://pith.science/api/pith-number/75T7H6BVKLX2X7KPYCGRY4USAC/graph.json","events_json":"https://pith.science/api/pith-number/75T7H6BVKLX2X7KPYCGRY4USAC/events.json","paper":"https://pith.science/paper/75T7H6BV"},"agent_actions":{"view_html":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC","download_json":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC.json","view_paper":"https://pith.science/paper/75T7H6BV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.25612&json=true","fetch_graph":"https://pith.science/api/pith-number/75T7H6BVKLX2X7KPYCGRY4USAC/graph.json","fetch_events":"https://pith.science/api/pith-number/75T7H6BVKLX2X7KPYCGRY4USAC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC/action/storage_attestation","attest_author":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC/action/author_attestation","sign_citation":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC/action/citation_signature","submit_replication":"https://pith.science/pith/75T7H6BVKLX2X7KPYCGRY4USAC/action/replication_record"}},"created_at":"2026-05-26T02:04:46.466998+00:00","updated_at":"2026-05-26T02:04:46.466998+00:00"}