{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:WJHRH7QFA6GE52BWFTCNWTXIPA","short_pith_number":"pith:WJHRH7QF","schema_version":"1.0","canonical_sha256":"b24f13fe05078c4ee8362cc4db4ee87818f9b3154c2838b455ed9a0d178206e3","source":{"kind":"arxiv","id":"2606.18465","version":1},"attestation_state":"computed","paper":{"title":"What Does the Weight Norm Control in Grokking? Logit-Scale Mediation under Cross-Entropy","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Truong Xuan Khanh","submitted_at":"2026-06-16T20:16:18Z","abstract_excerpt":"Grokking, the delayed jump from memorization to generalization, is usually tied to the weight norm: a smaller norm generalizes sooner. We ask what the norm actually controls. Holding the weight norm fixed by clamping and varying only an output temperature, we slide the grokking delay across its entire norm-induced range under cross-entropy; matching the effective logit scale back to baseline recovers about 85% of the delay at two moduli. Across a grid of norms and temperatures the delay collapses onto the logit scale alone (R2 = 0.97), with the norm adding 1-2% beyond it. The effect is loss-de"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.18465","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T20:16:18Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"50cff2db73d1cd7ca7036cdc09b5064b636d46e8fd42b8cad354352f057186e8","abstract_canon_sha256":"fd68b770ba076bb73819c75dbed8cdca36f7b492d2eacebd38a8a6dd66270db0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:11:01.870015Z","signature_b64":"D2puyzmFvhLGYDuPazMine7D73+kcDL9VIvWM+RZH7HL6GTpM0rfJ2YtaWKD7oWEm8/xLehFeaI8ZoYv2yzZBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b24f13fe05078c4ee8362cc4db4ee87818f9b3154c2838b455ed9a0d178206e3","last_reissued_at":"2026-06-19T16:11:01.869617Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:11:01.869617Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"What Does the Weight Norm Control in Grokking? Logit-Scale Mediation under Cross-Entropy","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Truong Xuan Khanh","submitted_at":"2026-06-16T20:16:18Z","abstract_excerpt":"Grokking, the delayed jump from memorization to generalization, is usually tied to the weight norm: a smaller norm generalizes sooner. We ask what the norm actually controls. Holding the weight norm fixed by clamping and varying only an output temperature, we slide the grokking delay across its entire norm-induced range under cross-entropy; matching the effective logit scale back to baseline recovers about 85% of the delay at two moduli. Across a grid of norms and temperatures the delay collapses onto the logit scale alone (R2 = 0.97), with the norm adding 1-2% beyond it. The effect is loss-de"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18465","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.18465/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.18465","created_at":"2026-06-19T16:11:01.869676+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.18465v1","created_at":"2026-06-19T16:11:01.869676+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18465","created_at":"2026-06-19T16:11:01.869676+00:00"},{"alias_kind":"pith_short_12","alias_value":"WJHRH7QFA6GE","created_at":"2026-06-19T16:11:01.869676+00:00"},{"alias_kind":"pith_short_16","alias_value":"WJHRH7QFA6GE52BW","created_at":"2026-06-19T16:11:01.869676+00:00"},{"alias_kind":"pith_short_8","alias_value":"WJHRH7QF","created_at":"2026-06-19T16:11:01.869676+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA","json":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA.json","graph_json":"https://pith.science/api/pith-number/WJHRH7QFA6GE52BWFTCNWTXIPA/graph.json","events_json":"https://pith.science/api/pith-number/WJHRH7QFA6GE52BWFTCNWTXIPA/events.json","paper":"https://pith.science/paper/WJHRH7QF"},"agent_actions":{"view_html":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA","download_json":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA.json","view_paper":"https://pith.science/paper/WJHRH7QF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.18465&json=true","fetch_graph":"https://pith.science/api/pith-number/WJHRH7QFA6GE52BWFTCNWTXIPA/graph.json","fetch_events":"https://pith.science/api/pith-number/WJHRH7QFA6GE52BWFTCNWTXIPA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA/action/storage_attestation","attest_author":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA/action/author_attestation","sign_citation":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA/action/citation_signature","submit_replication":"https://pith.science/pith/WJHRH7QFA6GE52BWFTCNWTXIPA/action/replication_record"}},"created_at":"2026-06-19T16:11:01.869676+00:00","updated_at":"2026-06-19T16:11:01.869676+00:00"}