{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:3H7DRKZ6UFC4CAAPGQNGMGI3TA","short_pith_number":"pith:3H7DRKZ6","schema_version":"1.0","canonical_sha256":"d9fe38ab3ea145c1000f341a66191b9813548f02cc6c4c900a6011874559d02c","source":{"kind":"arxiv","id":"2511.21016","version":3},"attestation_state":"computed","paper":{"title":"Gated KalmaNet: A Fading Memory Layer Through Test-Time Ridge Regression","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Aditya Chattopadhyay, Elvis Nunez, Liangzu Peng, Luca Zancato, Stefano Soatto, Wei Xia","submitted_at":"2025-11-26T03:26:37Z","abstract_excerpt":"Linear State-Space Models (SSMs) offer an efficient alternative to softmax Attention with constant memory and linear compute, but their lossy, fading summary of the past hurts recall-oriented tasks. We propose Gated KalmaNet (GKA, pronounced \"gee-ka\"), a layer that accounts for the full past while retaining SSM-style efficiency. We ground our approach in the Kalman Filter (KF), and show that several existing SSM layers (DeltaNet, Gated DeltaNet, Kimi Delta Attention) are approximations to the KF recurrence under an identity error covariance assumption, which ignores how past keys and values sh"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2511.21016","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-11-26T03:26:37Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"cad8de4bd8a4ac446d99123070f7bae43a0237c6b718c55fae429baf39390536","abstract_canon_sha256":"7e9736ae27d4b1dd61f641cc4ac86363565e8c9e0f3ecfb1734177fe496b65da"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:17.855278Z","signature_b64":"YaeSUTdhR8yPaUFyUigXuq+Z3/ia2irwVZEk6W6nveiCObrZF1MW/jFJ63ID2L42iGqihYqPiin5xd0NLEjoBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d9fe38ab3ea145c1000f341a66191b9813548f02cc6c4c900a6011874559d02c","last_reissued_at":"2026-05-20T00:04:17.854476Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:17.854476Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Gated KalmaNet: A Fading Memory Layer Through Test-Time Ridge Regression","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Aditya Chattopadhyay, Elvis Nunez, Liangzu Peng, Luca Zancato, Stefano Soatto, Wei Xia","submitted_at":"2025-11-26T03:26:37Z","abstract_excerpt":"Linear State-Space Models (SSMs) offer an efficient alternative to softmax Attention with constant memory and linear compute, but their lossy, fading summary of the past hurts recall-oriented tasks. We propose Gated KalmaNet (GKA, pronounced \"gee-ka\"), a layer that accounts for the full past while retaining SSM-style efficiency. We ground our approach in the Kalman Filter (KF), and show that several existing SSM layers (DeltaNet, Gated DeltaNet, Kimi Delta Attention) are approximations to the KF recurrence under an identity error covariance assumption, which ignores how past keys and values sh"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.21016","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.21016/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.21016","created_at":"2026-05-20T00:04:17.854586+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.21016v3","created_at":"2026-05-20T00:04:17.854586+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.21016","created_at":"2026-05-20T00:04:17.854586+00:00"},{"alias_kind":"pith_short_12","alias_value":"3H7DRKZ6UFC4","created_at":"2026-05-20T00:04:17.854586+00:00"},{"alias_kind":"pith_short_16","alias_value":"3H7DRKZ6UFC4CAAP","created_at":"2026-05-20T00:04:17.854586+00:00"},{"alias_kind":"pith_short_8","alias_value":"3H7DRKZ6","created_at":"2026-05-20T00:04:17.854586+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.08301","citing_title":"Priming: Hybrid State Space Models From Pre-trained Transformers","ref_index":69,"is_internal_anchor":true},{"citing_arxiv_id":"2604.21100","citing_title":"Preconditioned DeltaNet: Curvature-aware Sequence Modeling for Linear Recurrences","ref_index":40,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA","json":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA.json","graph_json":"https://pith.science/api/pith-number/3H7DRKZ6UFC4CAAPGQNGMGI3TA/graph.json","events_json":"https://pith.science/api/pith-number/3H7DRKZ6UFC4CAAPGQNGMGI3TA/events.json","paper":"https://pith.science/paper/3H7DRKZ6"},"agent_actions":{"view_html":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA","download_json":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA.json","view_paper":"https://pith.science/paper/3H7DRKZ6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.21016&json=true","fetch_graph":"https://pith.science/api/pith-number/3H7DRKZ6UFC4CAAPGQNGMGI3TA/graph.json","fetch_events":"https://pith.science/api/pith-number/3H7DRKZ6UFC4CAAPGQNGMGI3TA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA/action/storage_attestation","attest_author":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA/action/author_attestation","sign_citation":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA/action/citation_signature","submit_replication":"https://pith.science/pith/3H7DRKZ6UFC4CAAPGQNGMGI3TA/action/replication_record"}},"created_at":"2026-05-20T00:04:17.854586+00:00","updated_at":"2026-05-20T00:04:17.854586+00:00"}