{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YB3NHDCIAU4LOID55LM4DUZCQ4","short_pith_number":"pith:YB3NHDCI","schema_version":"1.0","canonical_sha256":"c076d38c480538b7207dead9c1d322872d8718073469adce944c2c128e623925","source":{"kind":"arxiv","id":"2602.13215","version":2},"attestation_state":"computed","paper":{"title":"When to Think Fast and Slow? AMOR: Adaptive Entropy Gate for Hybrid Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"AMOR routes attention to high-entropy tokens in recurrent models, matching full hybrids while using attention on only 22 percent of positions.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chen Shani, Haoran Zheng","submitted_at":"2026-01-22T17:19:58Z","abstract_excerpt":"Recurrent-attention hybrids aim to combine the efficiency of recurrence with the expressivity of attention, but existing approaches typically apply attention uniformly across all positions, even when the recurrent state alone is sufficient for accurate prediction. We introduce AMOR (Adaptive Metacognitive Output Router), a post-hoc hybrid architecture that selectively invokes attention based on predictive uncertainty. A recurrent backbone is augmented with entropy-gated attention blocks that activate only when the model's output entropy exceeds a dynamic threshold derived from a running batch "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2602.13215","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-01-22T17:19:58Z","cross_cats_sorted":[],"title_canon_sha256":"e0f05bdcd9b0fe30d0eceb440cd666cdfc4f4d5bdb61b11b069921a5723ec0c0","abstract_canon_sha256":"0676e5e3bb78eef29abe6f00ccc137b3f922cecc1e1654925b39f7e25c252d9e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:23.559158Z","signature_b64":"05q4MwoIvNmA0iokq3rwhfilo3twbkmiLTTtzfRh3A6b4aYJRxp5jsAm+fJLr9n8uk/0joH+uL1pqdewMvQ0Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c076d38c480538b7207dead9c1d322872d8718073469adce944c2c128e623925","last_reissued_at":"2026-05-18T03:09:23.557862Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:23.557862Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"When to Think Fast and Slow? AMOR: Adaptive Entropy Gate for Hybrid Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"AMOR routes attention to high-entropy tokens in recurrent models, matching full hybrids while using attention on only 22 percent of positions.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chen Shani, Haoran Zheng","submitted_at":"2026-01-22T17:19:58Z","abstract_excerpt":"Recurrent-attention hybrids aim to combine the efficiency of recurrence with the expressivity of attention, but existing approaches typically apply attention uniformly across all positions, even when the recurrent state alone is sufficient for accurate prediction. We introduce AMOR (Adaptive Metacognitive Output Router), a post-hoc hybrid architecture that selectively invokes attention based on predictive uncertainty. A recurrent backbone is augmented with entropy-gated attention blocks that activate only when the model's output entropy exceeds a dynamic threshold derived from a running batch "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Across Mamba2 and Gated DeltaNet backbones (180M-1.5B), AMOR consistently matches or outperforms both pure recurrent models and fixed-schedule hybrid baselines while invoking attention on only ~22% of tokens.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That a dynamic threshold derived from running batch median and scaled standard deviation of output entropy reliably identifies positions where recurrent state alone is insufficient, without introducing distribution-shift artifacts or requiring per-task retuning.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"AMOR uses output entropy to gate attention in recurrent hybrids, matching full attention performance at roughly 22% attention invocations across 180M-1.5B models.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"AMOR routes attention to high-entropy tokens in recurrent models, matching full hybrids while using attention on only 22 percent of positions.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"b16bd67777ddb00750298cd44c77c1b688532286daf22ba2945364431897b845"},"source":{"id":"2602.13215","kind":"arxiv","version":2},"verdict":{"id":"5682f14f-d821-4dcd-922c-578e60ab17a2","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T11:46:11.876477Z","strongest_claim":"Across Mamba2 and Gated DeltaNet backbones (180M-1.5B), AMOR consistently matches or outperforms both pure recurrent models and fixed-schedule hybrid baselines while invoking attention on only ~22% of tokens.","one_line_summary":"AMOR uses output entropy to gate attention in recurrent hybrids, matching full attention performance at roughly 22% attention invocations across 180M-1.5B models.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That a dynamic threshold derived from running batch median and scaled standard deviation of output entropy reliably identifies positions where recurrent state alone is insufficient, without introducing distribution-shift artifacts or requiring per-task retuning.","pith_extraction_headline":"AMOR routes attention to high-entropy tokens in recurrent models, matching full hybrids while using attention on only 22 percent of positions."},"references":{"count":17,"sample":[{"doi":"","year":null,"title":"Pondernet: Learning to ponder","work_id":"dbab54b9-3aa6-4f4d-8117-aa3c5ff6a0fd","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2004,"title":"Longformer: The Long-Document Transformer","work_id":"abea7a44-6668-4de7-aab6-f53a6e5aa088","ref_index":2,"cited_arxiv_id":"2004.05150","is_internal_anchor":true},{"doi":"","year":null,"title":"The Consciousness Prior, Dec","work_id":"3d734855-ef5a-4524-afd9-bedb436a7293","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation","work_id":"1fe8c7c8-aff7-4b94-9096-e549d7e60789","ref_index":4,"cited_arxiv_id":"1308.3432","is_internal_anchor":true},{"doi":"","year":null,"title":"Transformers are SSMs: Generalized Models and Efficient Algorithms Through Structured State Space Duality","work_id":"d8eba076-0449-4f6a-aae1-5a7260677f0f","ref_index":5,"cited_arxiv_id":"2405.21060","is_internal_anchor":true}],"resolved_work":17,"snapshot_sha256":"6400151de4a912613ae3cf01097bbfc2cf49f2bb39dc98ed4080834afc1ea965","internal_anchors":11},"formal_canon":{"evidence_count":2,"snapshot_sha256":"043ed7aadbd5aab771d5e553177b687332256e6a9d9bd7770362a98df362e82a"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.13215","created_at":"2026-05-18T03:09:23.558444+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.13215v2","created_at":"2026-05-18T03:09:23.558444+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.13215","created_at":"2026-05-18T03:09:23.558444+00:00"},{"alias_kind":"pith_short_12","alias_value":"YB3NHDCIAU4L","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"YB3NHDCIAU4LOID5","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"YB3NHDCI","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4","json":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4.json","graph_json":"https://pith.science/api/pith-number/YB3NHDCIAU4LOID55LM4DUZCQ4/graph.json","events_json":"https://pith.science/api/pith-number/YB3NHDCIAU4LOID55LM4DUZCQ4/events.json","paper":"https://pith.science/paper/YB3NHDCI"},"agent_actions":{"view_html":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4","download_json":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4.json","view_paper":"https://pith.science/paper/YB3NHDCI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.13215&json=true","fetch_graph":"https://pith.science/api/pith-number/YB3NHDCIAU4LOID55LM4DUZCQ4/graph.json","fetch_events":"https://pith.science/api/pith-number/YB3NHDCIAU4LOID55LM4DUZCQ4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4/action/storage_attestation","attest_author":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4/action/author_attestation","sign_citation":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4/action/citation_signature","submit_replication":"https://pith.science/pith/YB3NHDCIAU4LOID55LM4DUZCQ4/action/replication_record"}},"created_at":"2026-05-18T03:09:23.558444+00:00","updated_at":"2026-05-18T03:09:23.558444+00:00"}