{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:QTH2KVGY745HDG32IBOMWYNYQ3","short_pith_number":"pith:QTH2KVGY","schema_version":"1.0","canonical_sha256":"84cfa554d8ff3a719b7a405ccb61b886cb13fcb04d81955a22f436d1e817b8ed","source":{"kind":"arxiv","id":"2606.25010","version":1},"attestation_state":"computed","paper":{"title":"Emergent Capabilities Arise Randomly from Learning Sparse Attention Patterns","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Andrew Gordon Wilson, Pavel Izmailov, Shikai Qiu, Vatsal Baherwani, Zixi Chen","submitted_at":"2026-06-23T17:51:10Z","abstract_excerpt":"Neural scaling laws for transformer language models predict smooth improvements in pretraining loss with increasing parameters, but downstream capabilities such as in-context learning are known to emerge abruptly past a certain model scale. In this paper, we show that emergent capabilities arise stochastically throughout training, with larger models acquiring them earlier on average. We demonstrate that the emergence of capabilities such as pattern completion and indirect object identification corresponds to the abrupt learning of task-relevant attention patterns. To isolate this phenomenon, w"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.25010","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-23T17:51:10Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"acf72bb68b29db55d9ddc505426e2e44dde950f521c14d73d9b4c3c09ba3745f","abstract_canon_sha256":"0f0fb898cae6f04c857e78c8b60bee12f26fdbb7f48e1af9eb349f66c5c47c62"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-25T00:18:14.615630Z","signature_b64":"q9Y5CdvpT7XRsYio6MOAzQd17E18o82d+MfzCki1bPawgM8ccJQhEdNkq3/+ng2DDWKXNXMrHtlvP2WxIWe1Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"84cfa554d8ff3a719b7a405ccb61b886cb13fcb04d81955a22f436d1e817b8ed","last_reissued_at":"2026-06-25T00:18:14.615145Z","signature_status":"signed_v1","first_computed_at":"2026-06-25T00:18:14.615145Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Emergent Capabilities Arise Randomly from Learning Sparse Attention Patterns","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Andrew Gordon Wilson, Pavel Izmailov, Shikai Qiu, Vatsal Baherwani, Zixi Chen","submitted_at":"2026-06-23T17:51:10Z","abstract_excerpt":"Neural scaling laws for transformer language models predict smooth improvements in pretraining loss with increasing parameters, but downstream capabilities such as in-context learning are known to emerge abruptly past a certain model scale. In this paper, we show that emergent capabilities arise stochastically throughout training, with larger models acquiring them earlier on average. We demonstrate that the emergence of capabilities such as pattern completion and indirect object identification corresponds to the abrupt learning of task-relevant attention patterns. To isolate this phenomenon, w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.25010","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.25010/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.25010","created_at":"2026-06-25T00:18:14.615204+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.25010v1","created_at":"2026-06-25T00:18:14.615204+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.25010","created_at":"2026-06-25T00:18:14.615204+00:00"},{"alias_kind":"pith_short_12","alias_value":"QTH2KVGY745H","created_at":"2026-06-25T00:18:14.615204+00:00"},{"alias_kind":"pith_short_16","alias_value":"QTH2KVGY745HDG32","created_at":"2026-06-25T00:18:14.615204+00:00"},{"alias_kind":"pith_short_8","alias_value":"QTH2KVGY","created_at":"2026-06-25T00:18:14.615204+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3","json":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3.json","graph_json":"https://pith.science/api/pith-number/QTH2KVGY745HDG32IBOMWYNYQ3/graph.json","events_json":"https://pith.science/api/pith-number/QTH2KVGY745HDG32IBOMWYNYQ3/events.json","paper":"https://pith.science/paper/QTH2KVGY"},"agent_actions":{"view_html":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3","download_json":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3.json","view_paper":"https://pith.science/paper/QTH2KVGY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.25010&json=true","fetch_graph":"https://pith.science/api/pith-number/QTH2KVGY745HDG32IBOMWYNYQ3/graph.json","fetch_events":"https://pith.science/api/pith-number/QTH2KVGY745HDG32IBOMWYNYQ3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3/action/storage_attestation","attest_author":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3/action/author_attestation","sign_citation":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3/action/citation_signature","submit_replication":"https://pith.science/pith/QTH2KVGY745HDG32IBOMWYNYQ3/action/replication_record"}},"created_at":"2026-06-25T00:18:14.615204+00:00","updated_at":"2026-06-25T00:18:14.615204+00:00"}