{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:CP2AQAENCJKOBCEIK7T72NCQ2V","short_pith_number":"pith:CP2AQAEN","canonical_record":{"source":{"id":"2605.08475","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T20:50:58Z","cross_cats_sorted":["cs.AI","cs.NA","math.NA","math.OC"],"title_canon_sha256":"5e714626cc7f3312c7528de9b6aa3936c5b20016ba954268020980a28c7e4415","abstract_canon_sha256":"edffada1f687293db60b61f9efe64d358fab6c4671e670e3721f8f888fcbeb84"},"schema_version":"1.0"},"canonical_sha256":"13f408008d1254e0888857e7fd3450d54638ecf51340700a56044a7e0cd8216e","source":{"kind":"arxiv","id":"2605.08475","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.08475","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.08475v2","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.08475","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"pith_short_12","alias_value":"CP2AQAENCJKO","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"pith_short_16","alias_value":"CP2AQAENCJKOBCEI","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"pith_short_8","alias_value":"CP2AQAEN","created_at":"2026-05-20T00:02:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:CP2AQAENCJKOBCEIK7T72NCQ2V","target":"record","payload":{"canonical_record":{"source":{"id":"2605.08475","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T20:50:58Z","cross_cats_sorted":["cs.AI","cs.NA","math.NA","math.OC"],"title_canon_sha256":"5e714626cc7f3312c7528de9b6aa3936c5b20016ba954268020980a28c7e4415","abstract_canon_sha256":"edffada1f687293db60b61f9efe64d358fab6c4671e670e3721f8f888fcbeb84"},"schema_version":"1.0"},"canonical_sha256":"13f408008d1254e0888857e7fd3450d54638ecf51340700a56044a7e0cd8216e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:12.653837Z","signature_b64":"SoH2Tckx2MpeMiciPDUaraZhzriMPgystFwOwNvtQRFnJAbI2BD8kHfVt98ten3haLPH+7fUzvQ/wHG1+MvQAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"13f408008d1254e0888857e7fd3450d54638ecf51340700a56044a7e0cd8216e","last_reissued_at":"2026-05-20T00:02:12.653063Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:12.653063Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.08475","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/HAdNO7LBtTw8zwSlUsJP6sdQxb7IEctPZC7SzIxraJo9JtugAi4lCTvwFpFVsrBA6pAeqjlXmFRxeCAkgrqAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:33:34.560096Z"},"content_sha256":"86f75d5dca19b58d4ad710acc44b3c6647e9b91f117919e3d0d6e4c68a8321eb","schema_version":"1.0","event_id":"sha256:86f75d5dca19b58d4ad710acc44b3c6647e9b91f117919e3d0d6e4c68a8321eb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:CP2AQAENCJKOBCEIK7T72NCQ2V","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Transformers Can Implement Preconditioned Richardson Iteration for In-Context Gaussian Kernel Regression","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A standard softmax-attention transformer can run preconditioned Richardson iteration to approximate in-context Gaussian kernel ridge regression.","cross_cats":["cs.AI","cs.NA","math.NA","math.OC"],"primary_cat":"cs.LG","authors_text":"Charles Kulick, Dongyang Li, Mingsong Yan, Sui Tang","submitted_at":"2026-05-08T20:50:58Z","abstract_excerpt":"Mechanistic accounts of in-context learning (ICL) have identified iterative algorithms for linear regression and related linear prediction tasks, often using linear or ReLU attention variants. For nonlinear ICL, prior work has related softmax and kernelized attention to functional-gradient-type dynamics, but it remains unclear whether a standard transformer with softmax attention can implement a convergent solver with an end-to-end prediction-error guarantee. In this paper, we study in-context kernel ridge regression (KRR) with Gaussian kernels and show that a standard softmax-attention transf"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"a standard softmax-attention transformer can approximate the KRR predictor during its forward pass by implementing preconditioned Richardson iteration on the associated kernel linear system. Under bounded-data assumptions, we construct a single-head transformer with O(log(1/ε)) blocks and MLP width O(√(N/ε)) that achieves ε-accurate prediction for prompts of length N.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"Under bounded-data assumptions we construct... (the bounded-data assumptions that enable the approximation guarantee and error bounds for the transformer implementation).","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Standard softmax-attention transformers can approximate the Gaussian kernel ridge regression predictor by implementing preconditioned Richardson iteration during their forward pass.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A standard softmax-attention transformer can run preconditioned Richardson iteration to approximate in-context Gaussian kernel ridge regression.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"9a334547fd42db778f977935d5978a11078f5edc7c62d3e3d3c7d0940f8a904e"},"source":{"id":"2605.08475","kind":"arxiv","version":2},"verdict":{"id":"ac4dfcd4-17ed-4631-ae42-1c3ddf7c040e","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-12T03:02:41.829370Z","strongest_claim":"a standard softmax-attention transformer can approximate the KRR predictor during its forward pass by implementing preconditioned Richardson iteration on the associated kernel linear system. Under bounded-data assumptions, we construct a single-head transformer with O(log(1/ε)) blocks and MLP width O(√(N/ε)) that achieves ε-accurate prediction for prompts of length N.","one_line_summary":"Standard softmax-attention transformers can approximate the Gaussian kernel ridge regression predictor by implementing preconditioned Richardson iteration during their forward pass.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"Under bounded-data assumptions we construct... (the bounded-data assumptions that enable the approximation guarantee and error bounds for the transformer implementation).","pith_extraction_headline":"A standard softmax-attention transformer can run preconditioned Richardson iteration to approximate in-context Gaussian kernel ridge regression."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.08475/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:17.746036Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T11:04:15.358228Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"f14ea48e78476745d413e9171f77020c2bf311caecd8dc9d0e2ceabd9513dcd1"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"ac4dfcd4-17ed-4631-ae42-1c3ddf7c040e"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"n4NifQL/VR/RlQGm99AaRyICnYfD3EPXxgANzFbLjhzfzMzTifDRQ7Rjy/+8SwQnlh7MPMBZgPL9owpyCTjsCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:33:34.561101Z"},"content_sha256":"aa41ef73056b9e2968e8a17f8dbde6aec8072702ab55e8d3449d803b48def814","schema_version":"1.0","event_id":"sha256:aa41ef73056b9e2968e8a17f8dbde6aec8072702ab55e8d3449d803b48def814"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CP2AQAENCJKOBCEIK7T72NCQ2V/bundle.json","state_url":"https://pith.science/pith/CP2AQAENCJKOBCEIK7T72NCQ2V/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CP2AQAENCJKOBCEIK7T72NCQ2V/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T03:33:34Z","links":{"resolver":"https://pith.science/pith/CP2AQAENCJKOBCEIK7T72NCQ2V","bundle":"https://pith.science/pith/CP2AQAENCJKOBCEIK7T72NCQ2V/bundle.json","state":"https://pith.science/pith/CP2AQAENCJKOBCEIK7T72NCQ2V/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CP2AQAENCJKOBCEIK7T72NCQ2V/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:CP2AQAENCJKOBCEIK7T72NCQ2V","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"edffada1f687293db60b61f9efe64d358fab6c4671e670e3721f8f888fcbeb84","cross_cats_sorted":["cs.AI","cs.NA","math.NA","math.OC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T20:50:58Z","title_canon_sha256":"5e714626cc7f3312c7528de9b6aa3936c5b20016ba954268020980a28c7e4415"},"schema_version":"1.0","source":{"id":"2605.08475","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.08475","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.08475v2","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.08475","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"pith_short_12","alias_value":"CP2AQAENCJKO","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"pith_short_16","alias_value":"CP2AQAENCJKOBCEI","created_at":"2026-05-20T00:02:12Z"},{"alias_kind":"pith_short_8","alias_value":"CP2AQAEN","created_at":"2026-05-20T00:02:12Z"}],"graph_snapshots":[{"event_id":"sha256:aa41ef73056b9e2968e8a17f8dbde6aec8072702ab55e8d3449d803b48def814","target":"graph","created_at":"2026-05-20T00:02:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"a standard softmax-attention transformer can approximate the KRR predictor during its forward pass by implementing preconditioned Richardson iteration on the associated kernel linear system. Under bounded-data assumptions, we construct a single-head transformer with O(log(1/ε)) blocks and MLP width O(√(N/ε)) that achieves ε-accurate prediction for prompts of length N."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"Under bounded-data assumptions we construct... (the bounded-data assumptions that enable the approximation guarantee and error bounds for the transformer implementation)."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Standard softmax-attention transformers can approximate the Gaussian kernel ridge regression predictor by implementing preconditioned Richardson iteration during their forward pass."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A standard softmax-attention transformer can run preconditioned Richardson iteration to approximate in-context Gaussian kernel ridge regression."}],"snapshot_sha256":"9a334547fd42db778f977935d5978a11078f5edc7c62d3e3d3c7d0940f8a904e"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:17.746036Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T11:04:15.358228Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.08475/integrity.json","findings":[],"snapshot_sha256":"f14ea48e78476745d413e9171f77020c2bf311caecd8dc9d0e2ceabd9513dcd1","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Mechanistic accounts of in-context learning (ICL) have identified iterative algorithms for linear regression and related linear prediction tasks, often using linear or ReLU attention variants. For nonlinear ICL, prior work has related softmax and kernelized attention to functional-gradient-type dynamics, but it remains unclear whether a standard transformer with softmax attention can implement a convergent solver with an end-to-end prediction-error guarantee. In this paper, we study in-context kernel ridge regression (KRR) with Gaussian kernels and show that a standard softmax-attention transf","authors_text":"Charles Kulick, Dongyang Li, Mingsong Yan, Sui Tang","cross_cats":["cs.AI","cs.NA","math.NA","math.OC"],"headline":"A standard softmax-attention transformer can run preconditioned Richardson iteration to approximate in-context Gaussian kernel ridge regression.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T20:50:58Z","title":"Transformers Can Implement Preconditioned Richardson Iteration for In-Context Gaussian Kernel Regression"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.08475","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-12T03:02:41.829370Z","id":"ac4dfcd4-17ed-4631-ae42-1c3ddf7c040e","model_set":{"reader":"grok-4.3"},"one_line_summary":"Standard softmax-attention transformers can approximate the Gaussian kernel ridge regression predictor by implementing preconditioned Richardson iteration during their forward pass.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A standard softmax-attention transformer can run preconditioned Richardson iteration to approximate in-context Gaussian kernel ridge regression.","strongest_claim":"a standard softmax-attention transformer can approximate the KRR predictor during its forward pass by implementing preconditioned Richardson iteration on the associated kernel linear system. Under bounded-data assumptions, we construct a single-head transformer with O(log(1/ε)) blocks and MLP width O(√(N/ε)) that achieves ε-accurate prediction for prompts of length N.","weakest_assumption":"Under bounded-data assumptions we construct... (the bounded-data assumptions that enable the approximation guarantee and error bounds for the transformer implementation)."}},"verdict_id":"ac4dfcd4-17ed-4631-ae42-1c3ddf7c040e"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:86f75d5dca19b58d4ad710acc44b3c6647e9b91f117919e3d0d6e4c68a8321eb","target":"record","created_at":"2026-05-20T00:02:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"edffada1f687293db60b61f9efe64d358fab6c4671e670e3721f8f888fcbeb84","cross_cats_sorted":["cs.AI","cs.NA","math.NA","math.OC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T20:50:58Z","title_canon_sha256":"5e714626cc7f3312c7528de9b6aa3936c5b20016ba954268020980a28c7e4415"},"schema_version":"1.0","source":{"id":"2605.08475","kind":"arxiv","version":2}},"canonical_sha256":"13f408008d1254e0888857e7fd3450d54638ecf51340700a56044a7e0cd8216e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"13f408008d1254e0888857e7fd3450d54638ecf51340700a56044a7e0cd8216e","first_computed_at":"2026-05-20T00:02:12.653063Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:12.653063Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"SoH2Tckx2MpeMiciPDUaraZhzriMPgystFwOwNvtQRFnJAbI2BD8kHfVt98ten3haLPH+7fUzvQ/wHG1+MvQAQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:12.653837Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.08475","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:86f75d5dca19b58d4ad710acc44b3c6647e9b91f117919e3d0d6e4c68a8321eb","sha256:aa41ef73056b9e2968e8a17f8dbde6aec8072702ab55e8d3449d803b48def814"],"state_sha256":"54f6f21dcd2059ca520e2d02c0a4b0ba7634f46d9d415bb83098d445d9b281b2"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XUj0TNvM77QIx8ESnfwJAj0NGOTD7pEDeeTctwny+TY8RcTvEtWXqTaf/qTveZKCBqdTzbQ28VsOFMoh+OKICQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T03:33:34.565232Z","bundle_sha256":"901f613d8200186aed5cb806159be2407b66c8aa497dd504337fea3ee20bc967"}}