{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:4GZLISFORWYFPHMHLGN4SDP2P6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"21d2e0a1f4ee7261ee53ba1f358b8ab53995d1926f8e62a563a00647a1530f9c","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-01-12T18:56:49Z","title_canon_sha256":"2b8da6a68b450756d12923b1c2f434a107b1e93fe063bab567c85bdd1c56c5f9"},"schema_version":"1.0","source":{"id":"2301.05217","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2301.05217","created_at":"2026-05-17T23:39:21Z"},{"alias_kind":"arxiv_version","alias_value":"2301.05217v3","created_at":"2026-05-17T23:39:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2301.05217","created_at":"2026-05-17T23:39:21Z"},{"alias_kind":"pith_short_12","alias_value":"4GZLISFORWYF","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"4GZLISFORWYFPHMH","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"4GZLISFO","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:c9965912542fd412e48e814ae69f9b328753ea5925d85d1c866df11eff7d1804","target":"graph","created_at":"2026-05-17T23:39:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We fully reverse engineer the algorithm learned by these networks, which uses discrete Fourier transforms and trigonometric identities to convert addition to rotation about a circle. We confirm the algorithm by analyzing the activations and weights and by performing ablations in Fourier space."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the identified Fourier circuit is the dominant mechanism and that ablations in Fourier space fully isolate it without missing other co-occurring computations that could also produce the observed behavior."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Grokking arises from gradual amplification of a Fourier-based circuit in the weights followed by removal of memorizing components."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Transformers on modular addition learn a Fourier rotation algorithm that gradually replaces memorization during training."}],"snapshot_sha256":"f1565aaea603f2b8e3c5053c34253097b56be15b3dce4f00a30faa1a6f4836b7"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"31980ee767ce7529ad6c2d14bea13086d7092d3330d6d0dcde84c0b1b5a67bc6"},"paper":{"abstract_excerpt":"Neural networks often exhibit emergent behavior, where qualitatively new capabilities arise from scaling up the amount of parameters, training data, or training steps. One approach to understanding emergence is to find continuous \\textit{progress measures} that underlie the seemingly discontinuous qualitative changes. We argue that progress measures can be found via mechanistic interpretability: reverse-engineering learned behaviors into their individual components. As a case study, we investigate the recently-discovered phenomenon of ``grokking'' exhibited by small transformers trained on mod","authors_text":"Jacob Steinhardt, Jess Smith, Lawrence Chan, Neel Nanda, Tom Lieberum","cross_cats":["cs.AI"],"headline":"Transformers on modular addition learn a Fourier rotation algorithm that gradually replaces memorization during training.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-01-12T18:56:49Z","title":"Progress measures for grokking via mechanistic interpretability"},"references":{"count":43,"internal_anchors":8,"resolved_work":43,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"More is different for AI , url=","work_id":"b83cee25-39c1-43e8-a9b2-b44d309946a1","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"OpenAI blog , volume=","work_id":"31dc92c3-2fc3-432b-8d63-b7ee13f53a9c","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Advances in neural information processing systems , volume=","work_id":"12f5a236-ef7a-4d13-b4de-b51465a6f977","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":9,"title":"2022 ACM Conference on Fairness, Accountability, and Transparency , pages=","work_id":"13cfa27e-b611-4974-9c7b-cc1f7aad2bfc","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":10,"title":"Beren's Blog - Thoughts on AI, Neuroscience, and other things that interest me","work_id":"8c046681-98d7-4ee8-8d21-c4ab6dd3030c","year":null}],"snapshot_sha256":"a5a08dc1548e305c79acd181969ac690e75c98e831e57bfe9e16026165d8dbe2"},"source":{"id":"2301.05217","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-14T21:47:29.091836Z","id":"29c7ba9c-7acf-4806-ba94-b355101b2be4","model_set":{"reader":"grok-4.3"},"one_line_summary":"Grokking arises from gradual amplification of a Fourier-based circuit in the weights followed by removal of memorizing components.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Transformers on modular addition learn a Fourier rotation algorithm that gradually replaces memorization during training.","strongest_claim":"We fully reverse engineer the algorithm learned by these networks, which uses discrete Fourier transforms and trigonometric identities to convert addition to rotation about a circle. We confirm the algorithm by analyzing the activations and weights and by performing ablations in Fourier space.","weakest_assumption":"That the identified Fourier circuit is the dominant mechanism and that ablations in Fourier space fully isolate it without missing other co-occurring computations that could also produce the observed behavior."}},"verdict_id":"29c7ba9c-7acf-4806-ba94-b355101b2be4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fd11e88a965ab257c9deedd07c4faeb30bd8103be54040b164dc4164912f6567","target":"record","created_at":"2026-05-17T23:39:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"21d2e0a1f4ee7261ee53ba1f358b8ab53995d1926f8e62a563a00647a1530f9c","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-01-12T18:56:49Z","title_canon_sha256":"2b8da6a68b450756d12923b1c2f434a107b1e93fe063bab567c85bdd1c56c5f9"},"schema_version":"1.0","source":{"id":"2301.05217","kind":"arxiv","version":3}},"canonical_sha256":"e1b2b448ae8db0579d87599bc90dfa7f8b70549e054c6ffc140d0ac4dadecf36","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e1b2b448ae8db0579d87599bc90dfa7f8b70549e054c6ffc140d0ac4dadecf36","first_computed_at":"2026-05-17T23:39:21.566368Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:21.566368Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zaULf/6h7tzOnYtjRgnZpbgl5sDF/+1oNC78J8yVDW8Os/CBbOuccSizemltAO8enlaUYOOxI8Osn3l4lCooBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:21.567088Z","signed_message":"canonical_sha256_bytes"},"source_id":"2301.05217","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fd11e88a965ab257c9deedd07c4faeb30bd8103be54040b164dc4164912f6567","sha256:c9965912542fd412e48e814ae69f9b328753ea5925d85d1c866df11eff7d1804"],"state_sha256":"698141f5f4936e9bd95aefc6db8231a79bced2b65c2be7ee267cbd8bdc6214ec"}