{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:I477SBLGD5WLDCP7DL657LGZJS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"36c57741ef6dd962bdc9a2890666cd461274c58643108316d8c314a8ded05717","cross_cats_sorted":["cond-mat.dis-nn","cond-mat.stat-mech","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2025-05-30T08:18:23Z","title_canon_sha256":"8c3d8cf4b8d5a38546996c381e7822e264f761782815222187c09c67a15e79bb"},"schema_version":"1.0","source":{"id":"2505.24333","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.24333","created_at":"2026-05-21T02:04:48Z"},{"alias_kind":"arxiv_version","alias_value":"2505.24333v3","created_at":"2026-05-21T02:04:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.24333","created_at":"2026-05-21T02:04:48Z"},{"alias_kind":"pith_short_12","alias_value":"I477SBLGD5WL","created_at":"2026-05-21T02:04:48Z"},{"alias_kind":"pith_short_16","alias_value":"I477SBLGD5WLDCP7","created_at":"2026-05-21T02:04:48Z"},{"alias_kind":"pith_short_8","alias_value":"I477SBLG","created_at":"2026-05-21T02:04:48Z"}],"graph_snapshots":[{"event_id":"sha256:0187ef51d7bb7220ad67676b9e6088cc677714c3d0d10924d5bcb5d94ba541ff","target":"graph","created_at":"2026-05-21T02:04:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2505.24333/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Finding the right initialisation for neural networks is crucial to ensure smooth training and good performance. In transformers, the wrong initialisation can lead to one of two failure modes of self-attention layers: rank collapse, where all tokens collapse into similar representations, and entropy collapse, where highly concentrated attention scores lead to training instability. While previous work has studied different scaling regimes for transformers, an asymptotically exact, down-to-the constant prescription for how to initialise transformers has so far been lacking. Here, we provide an an","authors_text":"Alessio Giorlandino, Sebastian Goldt","cross_cats":["cond-mat.dis-nn","cond-mat.stat-mech","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2025-05-30T08:18:23Z","title":"Two failure modes of deep transformers and how to avoid them: a unified theory of signal propagation at initialisation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.24333","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e987b1cdd1de5f53b731da0f31213d9fd88091cbe62dbf04b65d3b2d7dbfbf9d","target":"record","created_at":"2026-05-21T02:04:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"36c57741ef6dd962bdc9a2890666cd461274c58643108316d8c314a8ded05717","cross_cats_sorted":["cond-mat.dis-nn","cond-mat.stat-mech","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2025-05-30T08:18:23Z","title_canon_sha256":"8c3d8cf4b8d5a38546996c381e7822e264f761782815222187c09c67a15e79bb"},"schema_version":"1.0","source":{"id":"2505.24333","kind":"arxiv","version":3}},"canonical_sha256":"473ff905661f6cb189ff1afddfacd94c90679239ab50599372629ea24b998857","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"473ff905661f6cb189ff1afddfacd94c90679239ab50599372629ea24b998857","first_computed_at":"2026-05-21T02:04:48.957473Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T02:04:48.957473Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0EVVKSM9I0gaDEOpVkkwyV0Xo6s/ZfSup80K/QpRCMFMk+o6VhJoFCOhzME5inZ4iRI6C+AjjoO8y0TmYkCnBA==","signature_status":"signed_v1","signed_at":"2026-05-21T02:04:48.958433Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.24333","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e987b1cdd1de5f53b731da0f31213d9fd88091cbe62dbf04b65d3b2d7dbfbf9d","sha256:0187ef51d7bb7220ad67676b9e6088cc677714c3d0d10924d5bcb5d94ba541ff"],"state_sha256":"327b89f210177ae27ea388c4af37037a7ed35adaae6cb28f260243b34e296951"}