{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:QCFYB4O7Y7II475G5OEATJJ7XA","short_pith_number":"pith:QCFYB4O7","schema_version":"1.0","canonical_sha256":"808b80f1dfc7d08e7fa6eb8809a53fb8360f6276e5cf519f68c0dc1c743e3074","source":{"kind":"arxiv","id":"2605.22416","version":1},"attestation_state":"computed","paper":{"title":"Asymmetric Virtual Memory Paging for Hybrid Mamba-Transformer Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DC","cs.PF"],"primary_cat":"cs.LG","authors_text":"An Xuan Nguyen","submitted_at":"2026-05-21T12:37:34Z","abstract_excerpt":"Hybrid language models like Jamba mix attention layers with State Space Models (SSMs), creating two memory cache types with opposite profiles: Key-Value (KV) caches grow linearly with sequence length, while SSM states stay fixed per layer. Current inference engines handle this poorly. Unified pools pad SSM states to attention page sizes, wasting up to 7.3x capacity. Static dual pools cannot adapt when prompt distributions shift between requests. We present Asymmetric Virtual Memory Paging (AVMP). The allocator separates the two cache types into physically distinct pools behind a unified virtua"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.22416","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T12:37:34Z","cross_cats_sorted":["cs.DC","cs.PF"],"title_canon_sha256":"0d63c805aba39517eb19a7ab9e3481e0cc3941336a1bf50434951ce1e1097f69","abstract_canon_sha256":"316dd110632a099fa591c243d5e153e9795eeb2d2513a53bc416b070e60c3fe0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:42.297366Z","signature_b64":"/zE7w9cstldxgZNrz1iXh1Cza4BVPvLNmSpsFLCGIB9PD+JII9VhZL5oM4oAJcti1QFfNpbg89le1G5Ew18wBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"808b80f1dfc7d08e7fa6eb8809a53fb8360f6276e5cf519f68c0dc1c743e3074","last_reissued_at":"2026-05-22T01:04:42.296738Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:42.296738Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Asymmetric Virtual Memory Paging for Hybrid Mamba-Transformer Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DC","cs.PF"],"primary_cat":"cs.LG","authors_text":"An Xuan Nguyen","submitted_at":"2026-05-21T12:37:34Z","abstract_excerpt":"Hybrid language models like Jamba mix attention layers with State Space Models (SSMs), creating two memory cache types with opposite profiles: Key-Value (KV) caches grow linearly with sequence length, while SSM states stay fixed per layer. Current inference engines handle this poorly. Unified pools pad SSM states to attention page sizes, wasting up to 7.3x capacity. Static dual pools cannot adapt when prompt distributions shift between requests. We present Asymmetric Virtual Memory Paging (AVMP). The allocator separates the two cache types into physically distinct pools behind a unified virtua"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22416","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.22416/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.22416","created_at":"2026-05-22T01:04:42.296849+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.22416v1","created_at":"2026-05-22T01:04:42.296849+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22416","created_at":"2026-05-22T01:04:42.296849+00:00"},{"alias_kind":"pith_short_12","alias_value":"QCFYB4O7Y7II","created_at":"2026-05-22T01:04:42.296849+00:00"},{"alias_kind":"pith_short_16","alias_value":"QCFYB4O7Y7II475G","created_at":"2026-05-22T01:04:42.296849+00:00"},{"alias_kind":"pith_short_8","alias_value":"QCFYB4O7","created_at":"2026-05-22T01:04:42.296849+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA","json":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA.json","graph_json":"https://pith.science/api/pith-number/QCFYB4O7Y7II475G5OEATJJ7XA/graph.json","events_json":"https://pith.science/api/pith-number/QCFYB4O7Y7II475G5OEATJJ7XA/events.json","paper":"https://pith.science/paper/QCFYB4O7"},"agent_actions":{"view_html":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA","download_json":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA.json","view_paper":"https://pith.science/paper/QCFYB4O7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.22416&json=true","fetch_graph":"https://pith.science/api/pith-number/QCFYB4O7Y7II475G5OEATJJ7XA/graph.json","fetch_events":"https://pith.science/api/pith-number/QCFYB4O7Y7II475G5OEATJJ7XA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA/action/storage_attestation","attest_author":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA/action/author_attestation","sign_citation":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA/action/citation_signature","submit_replication":"https://pith.science/pith/QCFYB4O7Y7II475G5OEATJJ7XA/action/replication_record"}},"created_at":"2026-05-22T01:04:42.296849+00:00","updated_at":"2026-05-22T01:04:42.296849+00:00"}