{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QVL7Y3HRHQ6NMD3QS653LTLBEG","short_pith_number":"pith:QVL7Y3HR","canonical_record":{"source":{"id":"2605.07333","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T06:37:51Z","cross_cats_sorted":[],"title_canon_sha256":"f4cd6c6fff14e397f96992fe72b8137ed2309dbc9ab7077ac3511ebd71d95017","abstract_canon_sha256":"ef20c4785bfd3d0c313a012adf5467b17f16589b980a02ddc3620d5999e4a6c6"},"schema_version":"1.0"},"canonical_sha256":"8557fc6cf13c3cd60f7097bbb5cd612188fd06e6c8d80549a64b31ee0c92bf21","source":{"kind":"arxiv","id":"2605.07333","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.07333","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.07333v2","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.07333","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"QVL7Y3HRHQ6N","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"QVL7Y3HRHQ6NMD3Q","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"QVL7Y3HR","created_at":"2026-05-20T00:03:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QVL7Y3HRHQ6NMD3QS653LTLBEG","target":"record","payload":{"canonical_record":{"source":{"id":"2605.07333","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T06:37:51Z","cross_cats_sorted":[],"title_canon_sha256":"f4cd6c6fff14e397f96992fe72b8137ed2309dbc9ab7077ac3511ebd71d95017","abstract_canon_sha256":"ef20c4785bfd3d0c313a012adf5467b17f16589b980a02ddc3620d5999e4a6c6"},"schema_version":"1.0"},"canonical_sha256":"8557fc6cf13c3cd60f7097bbb5cd612188fd06e6c8d80549a64b31ee0c92bf21","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:14.783834Z","signature_b64":"RaRwx4MTi+Y14xFvKlGyx7tfKrGzB8CrhmTZd6V3k/15XW0fjXokx+pVKEaqHqYr+soMekweoPi3Ua3nS2riDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8557fc6cf13c3cd60f7097bbb5cd612188fd06e6c8d80549a64b31ee0c92bf21","last_reissued_at":"2026-05-20T00:03:14.782992Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:14.782992Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.07333","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QXio2WdjmERYRV0SU+sfhgvGVhrFlBT0v/UNTCwchwtHqYD1hm9bFu4d22mZQrMZumxiZkrrRDC4GkvjIZvFBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T19:26:39.969668Z"},"content_sha256":"10384d936afc4b3b9c2d4d8ba939a6ae2693e73c22b4f30dc6edf20b169a3b9d","schema_version":"1.0","event_id":"sha256:10384d936afc4b3b9c2d4d8ba939a6ae2693e73c22b4f30dc6edf20b169a3b9d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QVL7Y3HRHQ6NMD3QS653LTLBEG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond Linear Attention: Softmax Transformers Implement In-Context Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Softmax attention in Transformers computes iterative updates of a weighted softmax TD learning algorithm across layers.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Claire Chen, Rohan Chandra, Shangtong Zhang, Shuze Daniel Liu, Xinyu Liu, Zixuan Xie","submitted_at":"2026-05-08T06:37:51Z","abstract_excerpt":"In-context reinforcement learning (ICRL) studies agents that, after pretraining, adapt to new tasks by conditioning on additional context without parameter updates. Existing theoretical analyses of ICRL largely rely on linear attention, which replaces the softmax function in the standard attention with an identity mapping. This paper provides the first theoretical understanding of ICRL without making the unrealistic linear attention simplification. In particular, we consider the standard softmax attention used in practice. We show that, with certain parameters, the layerwise forward pass of a "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"with certain parameters, the layerwise forward pass of a Transformer with such softmax attention is equivalent to iterative updates of a weighted softmax temporal difference (TD) learning algorithm.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The existence of specific parameters that simultaneously achieve the forward-pass equivalence, satisfy the contraction condition for error decay, and globally minimize the pretraining loss.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Softmax Transformers with specific parameters implement iterative weighted softmax TD learning for in-context policy evaluation, with evaluation error decaying over layers and those parameters globally minimizing pretraining loss.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Softmax attention in Transformers computes iterative updates of a weighted softmax TD learning algorithm across layers.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"7864c97d0132d6c60ead6119cf947fab0d1fa85118d891d506e9118f827fad4b"},"source":{"id":"2605.07333","kind":"arxiv","version":2},"verdict":{"id":"9fcc8599-80aa-4c09-84b4-08105701a364","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-11T01:09:34.720802Z","strongest_claim":"with certain parameters, the layerwise forward pass of a Transformer with such softmax attention is equivalent to iterative updates of a weighted softmax temporal difference (TD) learning algorithm.","one_line_summary":"Softmax Transformers with specific parameters implement iterative weighted softmax TD learning for in-context policy evaluation, with evaluation error decaying over layers and those parameters globally minimizing pretraining loss.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The existence of specific parameters that simultaneously achieve the forward-pass equivalence, satisfy the contraction condition for error decay, and globally minimize the pretraining loss.","pith_extraction_headline":"Softmax attention in Transformers computes iterative updates of a weighted softmax TD learning algorithm across layers."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.07333/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T17:01:18.756730Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T11:53:37.487534Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"b0889f43668b839d2e9d9a68469d06a80be4aeb5bd1060694e8750b465b1e352"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"7294b1e234293f616d18d2af1f7f60e861767468fa237dec641333eb5394c133"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"9fcc8599-80aa-4c09-84b4-08105701a364"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gmhXZHitgH/tKfnehGVhQ7BfLUqv3HCH8eofOSTULysmLJsh3/sDvud6d5eatbGEeuE54qmLac1qPgXeDSalBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T19:26:39.970560Z"},"content_sha256":"00e8ae69e72c66b446bceba0b2a766930438ed1bf9dd4aba309edcd4e5cd7ee5","schema_version":"1.0","event_id":"sha256:00e8ae69e72c66b446bceba0b2a766930438ed1bf9dd4aba309edcd4e5cd7ee5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG/bundle.json","state_url":"https://pith.science/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T19:26:39Z","links":{"resolver":"https://pith.science/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG","bundle":"https://pith.science/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG/bundle.json","state":"https://pith.science/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QVL7Y3HRHQ6NMD3QS653LTLBEG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QVL7Y3HRHQ6NMD3QS653LTLBEG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ef20c4785bfd3d0c313a012adf5467b17f16589b980a02ddc3620d5999e4a6c6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T06:37:51Z","title_canon_sha256":"f4cd6c6fff14e397f96992fe72b8137ed2309dbc9ab7077ac3511ebd71d95017"},"schema_version":"1.0","source":{"id":"2605.07333","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.07333","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.07333v2","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.07333","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"QVL7Y3HRHQ6N","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"QVL7Y3HRHQ6NMD3Q","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"QVL7Y3HR","created_at":"2026-05-20T00:03:14Z"}],"graph_snapshots":[{"event_id":"sha256:00e8ae69e72c66b446bceba0b2a766930438ed1bf9dd4aba309edcd4e5cd7ee5","target":"graph","created_at":"2026-05-20T00:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"with certain parameters, the layerwise forward pass of a Transformer with such softmax attention is equivalent to iterative updates of a weighted softmax temporal difference (TD) learning algorithm."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The existence of specific parameters that simultaneously achieve the forward-pass equivalence, satisfy the contraction condition for error decay, and globally minimize the pretraining loss."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Softmax Transformers with specific parameters implement iterative weighted softmax TD learning for in-context policy evaluation, with evaluation error decaying over layers and those parameters globally minimizing pretraining loss."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Softmax attention in Transformers computes iterative updates of a weighted softmax TD learning algorithm across layers."}],"snapshot_sha256":"7864c97d0132d6c60ead6119cf947fab0d1fa85118d891d506e9118f827fad4b"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"7294b1e234293f616d18d2af1f7f60e861767468fa237dec641333eb5394c133"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T17:01:18.756730Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T11:53:37.487534Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.07333/integrity.json","findings":[],"snapshot_sha256":"b0889f43668b839d2e9d9a68469d06a80be4aeb5bd1060694e8750b465b1e352","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In-context reinforcement learning (ICRL) studies agents that, after pretraining, adapt to new tasks by conditioning on additional context without parameter updates. Existing theoretical analyses of ICRL largely rely on linear attention, which replaces the softmax function in the standard attention with an identity mapping. This paper provides the first theoretical understanding of ICRL without making the unrealistic linear attention simplification. In particular, we consider the standard softmax attention used in practice. We show that, with certain parameters, the layerwise forward pass of a ","authors_text":"Claire Chen, Rohan Chandra, Shangtong Zhang, Shuze Daniel Liu, Xinyu Liu, Zixuan Xie","cross_cats":[],"headline":"Softmax attention in Transformers computes iterative updates of a weighted softmax TD learning algorithm across layers.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T06:37:51Z","title":"Beyond Linear Attention: Softmax Transformers Implement In-Context Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.07333","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-11T01:09:34.720802Z","id":"9fcc8599-80aa-4c09-84b4-08105701a364","model_set":{"reader":"grok-4.3"},"one_line_summary":"Softmax Transformers with specific parameters implement iterative weighted softmax TD learning for in-context policy evaluation, with evaluation error decaying over layers and those parameters globally minimizing pretraining loss.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Softmax attention in Transformers computes iterative updates of a weighted softmax TD learning algorithm across layers.","strongest_claim":"with certain parameters, the layerwise forward pass of a Transformer with such softmax attention is equivalent to iterative updates of a weighted softmax temporal difference (TD) learning algorithm.","weakest_assumption":"The existence of specific parameters that simultaneously achieve the forward-pass equivalence, satisfy the contraction condition for error decay, and globally minimize the pretraining loss."}},"verdict_id":"9fcc8599-80aa-4c09-84b4-08105701a364"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:10384d936afc4b3b9c2d4d8ba939a6ae2693e73c22b4f30dc6edf20b169a3b9d","target":"record","created_at":"2026-05-20T00:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ef20c4785bfd3d0c313a012adf5467b17f16589b980a02ddc3620d5999e4a6c6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T06:37:51Z","title_canon_sha256":"f4cd6c6fff14e397f96992fe72b8137ed2309dbc9ab7077ac3511ebd71d95017"},"schema_version":"1.0","source":{"id":"2605.07333","kind":"arxiv","version":2}},"canonical_sha256":"8557fc6cf13c3cd60f7097bbb5cd612188fd06e6c8d80549a64b31ee0c92bf21","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8557fc6cf13c3cd60f7097bbb5cd612188fd06e6c8d80549a64b31ee0c92bf21","first_computed_at":"2026-05-20T00:03:14.782992Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:14.782992Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RaRwx4MTi+Y14xFvKlGyx7tfKrGzB8CrhmTZd6V3k/15XW0fjXokx+pVKEaqHqYr+soMekweoPi3Ua3nS2riDg==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:14.783834Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.07333","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:10384d936afc4b3b9c2d4d8ba939a6ae2693e73c22b4f30dc6edf20b169a3b9d","sha256:00e8ae69e72c66b446bceba0b2a766930438ed1bf9dd4aba309edcd4e5cd7ee5"],"state_sha256":"8320a6d56f3e9715901eac72bee78f795c9e5f1b0ca5e6a9340930cf21bdbbdc"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gOX/RkUF9iI6WDrsV/ZDk2PGdLMqRDjGjQWgUm+iK8gos55UaKWepDgAylKQblD84bqtOYwsFiLdY6kEEUAzAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T19:26:39.974590Z","bundle_sha256":"733492b249b14276e1cb59fa78de8b89776ba5ebd3a8f91f91e880a5100c83d4"}}