{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:RCPETT5HFXXTYZCIDGIVXYAWDV","short_pith_number":"pith:RCPETT5H","canonical_record":{"source":{"id":"2407.04620","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-07-05T16:23:20Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"28bf260612ef235043aafc2f64009b40780baf577faf3678da216f6c9231734f","abstract_canon_sha256":"05b4d8152342b055af443082b4000e3e33ae32d46334dbf1752401c3572d0c9e"},"schema_version":"1.0"},"canonical_sha256":"889e49cfa72def3c644819915be0161d71812901998c79e2d764dfbfa76e92d6","source":{"kind":"arxiv","id":"2407.04620","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2407.04620","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"arxiv_version","alias_value":"2407.04620v4","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.04620","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"pith_short_12","alias_value":"RCPETT5HFXXT","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"RCPETT5HFXXTYZCI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"RCPETT5H","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:RCPETT5HFXXTYZCIDGIVXYAWDV","target":"record","payload":{"canonical_record":{"source":{"id":"2407.04620","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-07-05T16:23:20Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"28bf260612ef235043aafc2f64009b40780baf577faf3678da216f6c9231734f","abstract_canon_sha256":"05b4d8152342b055af443082b4000e3e33ae32d46334dbf1752401c3572d0c9e"},"schema_version":"1.0"},"canonical_sha256":"889e49cfa72def3c644819915be0161d71812901998c79e2d764dfbfa76e92d6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:53.408752Z","signature_b64":"cK95Vz4jMmKFC42dlOfMtHvEm/WTQ9En+dsKpBh0dy/o4nrwA3XE7e6THxtHVGrWIRqcfSaDnAFsbkbYKUvnBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"889e49cfa72def3c644819915be0161d71812901998c79e2d764dfbfa76e92d6","last_reissued_at":"2026-05-17T23:38:53.408085Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:53.408085Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2407.04620","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hl4+Zxs3+AzRtTGBg7v1wwKJDBCZEqyDNybx7tafc1H9jz4ZGmlHaVhcfPrNhw7Fz2twlBdBBfiZiKks2WsZBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T03:49:55.337074Z"},"content_sha256":"6d56d75f4aa2c393848cb326b2ba9f739d5dce9e81bfb456bb2a20447c981d31","schema_version":"1.0","event_id":"sha256:6d56d75f4aa2c393848cb326b2ba9f739d5dce9e81bfb456bb2a20447c981d31"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:RCPETT5HFXXTYZCIDGIVXYAWDV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning to (Learn at Test Time): RNNs with Expressive Hidden States","license":"http://creativecommons.org/licenses/by/4.0/","headline":"RNNs can match long-context performance by updating a learnable hidden-state model via self-supervised steps at test time.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Arjun Vikram, Carlos Guestrin, Genghan Zhang, Jiarui Xu, Karan Dalal, Sanmi Koyejo, Tatsunori Hashimoto, Xiaolong Wang, Xinhao Li, Xinlei Chen, Yann Dubois, Yu Sun","submitted_at":"2024-07-05T16:23:20Z","abstract_excerpt":"Self-attention performs well in long context but has quadratic complexity. Existing RNN layers have linear complexity, but their performance in long context is limited by the expressive power of their hidden states. We present a practical framework for instantiating sequence modeling layers with linear complexity and expressive hidden states. The key idea is to make the hidden state a machine learning model itself, and the update rule a step of self-supervised learning. Since the hidden state is updated by training even on test sequences, our layers are called Test-Time Training (TTT) layers. "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"TTT-Linear and TTT-MLP can keep reducing perplexity by conditioning on more tokens, while Mamba cannot after 16k context.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That performing gradient-based self-supervised updates on the hidden-state model at test time remains stable, computationally tractable, and beneficial without overfitting or excessive overhead at scale.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"TTT layers treat the hidden state as a trainable model updated at test time, allowing linear-complexity sequence models to scale perplexity reduction with context length unlike Mamba.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"RNNs can match long-context performance by updating a learnable hidden-state model via self-supervised steps at test time.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"9095f13a6182e0b5955486320b67b84c814e7b8aa639ebaf1f48e65d1eb93dcd"},"source":{"id":"2407.04620","kind":"arxiv","version":4},"verdict":{"id":"e57d0e53-4244-4e10-b83a-5f1649bddba6","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T05:15:14.448221Z","strongest_claim":"TTT-Linear and TTT-MLP can keep reducing perplexity by conditioning on more tokens, while Mamba cannot after 16k context.","one_line_summary":"TTT layers treat the hidden state as a trainable model updated at test time, allowing linear-complexity sequence models to scale perplexity reduction with context length unlike Mamba.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That performing gradient-based self-supervised updates on the hidden-state model at test time remains stable, computationally tractable, and beneficial without overfitting or excessive overhead at scale.","pith_extraction_headline":"RNNs can match long-context performance by updating a learnable hidden-state model via self-supervised steps at test time."},"references":{"count":85,"sample":[{"doi":"","year":2023,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","ref_index":1,"cited_arxiv_id":"2303.08774","is_internal_anchor":true},{"doi":"","year":2016,"title":"Learning to learn by gradient descent by gradient descent","work_id":"10a07384-39c3-4ed9-9876-b91a06e77edc","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"You just found out your book was used to train ai","work_id":"177c1a74-b066-4e47-a0dd-3a374c625bdc","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"o ppel, Markus Spanring, Andreas Auer, Oleksandra Prudnikova, Michael Kopp, G \\","work_id":"cd8ee2fb-957f-4f9d-bdf9-e168fba4c2b8","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1990,"title":"Learning a synaptic learning rule","work_id":"a61250e2-c21e-4545-9b59-e49d185d4f4e","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":85,"snapshot_sha256":"a5e266a013436bb700daa9fde2a862c497152e65109a0e936d186157f3c5aa39","internal_anchors":15},"formal_canon":{"evidence_count":2,"snapshot_sha256":"bd30cfad859502468e2d539bb2bf092191e9ddf5781c7d3128f227b3de8e073d"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"e57d0e53-4244-4e10-b83a-5f1649bddba6"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ECyN5uuGMueWuEG5M95tw9sC8b4yvFcLbYJiO73GaXyxSHJJJb1vg1c/Lk+c+jnG9PJOj2G9lAxtEuCg+frRAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T03:49:55.337704Z"},"content_sha256":"24d17940913f84963edd20dc4ba76712910d2b454f02f458270787de7f3ba6f7","schema_version":"1.0","event_id":"sha256:24d17940913f84963edd20dc4ba76712910d2b454f02f458270787de7f3ba6f7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RCPETT5HFXXTYZCIDGIVXYAWDV/bundle.json","state_url":"https://pith.science/pith/RCPETT5HFXXTYZCIDGIVXYAWDV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RCPETT5HFXXTYZCIDGIVXYAWDV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T03:49:55Z","links":{"resolver":"https://pith.science/pith/RCPETT5HFXXTYZCIDGIVXYAWDV","bundle":"https://pith.science/pith/RCPETT5HFXXTYZCIDGIVXYAWDV/bundle.json","state":"https://pith.science/pith/RCPETT5HFXXTYZCIDGIVXYAWDV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RCPETT5HFXXTYZCIDGIVXYAWDV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:RCPETT5HFXXTYZCIDGIVXYAWDV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"05b4d8152342b055af443082b4000e3e33ae32d46334dbf1752401c3572d0c9e","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-07-05T16:23:20Z","title_canon_sha256":"28bf260612ef235043aafc2f64009b40780baf577faf3678da216f6c9231734f"},"schema_version":"1.0","source":{"id":"2407.04620","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2407.04620","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"arxiv_version","alias_value":"2407.04620v4","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.04620","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"pith_short_12","alias_value":"RCPETT5HFXXT","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"RCPETT5HFXXTYZCI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"RCPETT5H","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:24d17940913f84963edd20dc4ba76712910d2b454f02f458270787de7f3ba6f7","target":"graph","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"TTT-Linear and TTT-MLP can keep reducing perplexity by conditioning on more tokens, while Mamba cannot after 16k context."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That performing gradient-based self-supervised updates on the hidden-state model at test time remains stable, computationally tractable, and beneficial without overfitting or excessive overhead at scale."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"TTT layers treat the hidden state as a trainable model updated at test time, allowing linear-complexity sequence models to scale perplexity reduction with context length unlike Mamba."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"RNNs can match long-context performance by updating a learnable hidden-state model via self-supervised steps at test time."}],"snapshot_sha256":"9095f13a6182e0b5955486320b67b84c814e7b8aa639ebaf1f48e65d1eb93dcd"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"bd30cfad859502468e2d539bb2bf092191e9ddf5781c7d3128f227b3de8e073d"},"paper":{"abstract_excerpt":"Self-attention performs well in long context but has quadratic complexity. Existing RNN layers have linear complexity, but their performance in long context is limited by the expressive power of their hidden states. We present a practical framework for instantiating sequence modeling layers with linear complexity and expressive hidden states. The key idea is to make the hidden state a machine learning model itself, and the update rule a step of self-supervised learning. Since the hidden state is updated by training even on test sequences, our layers are called Test-Time Training (TTT) layers. ","authors_text":"Arjun Vikram, Carlos Guestrin, Genghan Zhang, Jiarui Xu, Karan Dalal, Sanmi Koyejo, Tatsunori Hashimoto, Xiaolong Wang, Xinhao Li, Xinlei Chen, Yann Dubois, Yu Sun","cross_cats":["cs.AI","cs.CL"],"headline":"RNNs can match long-context performance by updating a learnable hidden-state model via self-supervised steps at test time.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-07-05T16:23:20Z","title":"Learning to (Learn at Test Time): RNNs with Expressive Hidden States"},"references":{"count":85,"internal_anchors":15,"resolved_work":85,"sample":[{"cited_arxiv_id":"2303.08774","doi":"","is_internal_anchor":true,"ref_index":1,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Learning to learn by gradient descent by gradient descent","work_id":"10a07384-39c3-4ed9-9876-b91a06e77edc","year":2016},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"You just found out your book was used to train ai","work_id":"177c1a74-b066-4e47-a0dd-3a374c625bdc","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"o ppel, Markus Spanring, Andreas Auer, Oleksandra Prudnikova, Michael Kopp, G \\","work_id":"cd8ee2fb-957f-4f9d-bdf9-e168fba4c2b8","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Learning a synaptic learning rule","work_id":"a61250e2-c21e-4545-9b59-e49d185d4f4e","year":1990}],"snapshot_sha256":"a5e266a013436bb700daa9fde2a862c497152e65109a0e936d186157f3c5aa39"},"source":{"id":"2407.04620","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-15T05:15:14.448221Z","id":"e57d0e53-4244-4e10-b83a-5f1649bddba6","model_set":{"reader":"grok-4.3"},"one_line_summary":"TTT layers treat the hidden state as a trainable model updated at test time, allowing linear-complexity sequence models to scale perplexity reduction with context length unlike Mamba.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"RNNs can match long-context performance by updating a learnable hidden-state model via self-supervised steps at test time.","strongest_claim":"TTT-Linear and TTT-MLP can keep reducing perplexity by conditioning on more tokens, while Mamba cannot after 16k context.","weakest_assumption":"That performing gradient-based self-supervised updates on the hidden-state model at test time remains stable, computationally tractable, and beneficial without overfitting or excessive overhead at scale."}},"verdict_id":"e57d0e53-4244-4e10-b83a-5f1649bddba6"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6d56d75f4aa2c393848cb326b2ba9f739d5dce9e81bfb456bb2a20447c981d31","target":"record","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"05b4d8152342b055af443082b4000e3e33ae32d46334dbf1752401c3572d0c9e","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-07-05T16:23:20Z","title_canon_sha256":"28bf260612ef235043aafc2f64009b40780baf577faf3678da216f6c9231734f"},"schema_version":"1.0","source":{"id":"2407.04620","kind":"arxiv","version":4}},"canonical_sha256":"889e49cfa72def3c644819915be0161d71812901998c79e2d764dfbfa76e92d6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"889e49cfa72def3c644819915be0161d71812901998c79e2d764dfbfa76e92d6","first_computed_at":"2026-05-17T23:38:53.408085Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:53.408085Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"cK95Vz4jMmKFC42dlOfMtHvEm/WTQ9En+dsKpBh0dy/o4nrwA3XE7e6THxtHVGrWIRqcfSaDnAFsbkbYKUvnBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:53.408752Z","signed_message":"canonical_sha256_bytes"},"source_id":"2407.04620","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6d56d75f4aa2c393848cb326b2ba9f739d5dce9e81bfb456bb2a20447c981d31","sha256:24d17940913f84963edd20dc4ba76712910d2b454f02f458270787de7f3ba6f7"],"state_sha256":"15223be3a52b8a08b1ca6eec6a07d2eb13de7071b870ac5db3dd7e46941675d0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2obM28rFq9exfWbZv0fi8QXkWS0nk4C9cumnQHvqaKJcsmzXDvqE/c2rbA+ezJcg2E6Niu1DdOI489ASyA5aAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T03:49:55.340423Z","bundle_sha256":"1cebd2c42dc57db1e724fc052f551f7a5752f08f8582a1b63a7f29df7d136064"}}