{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YZFSFS5MWNSZLC3RF4CMT2DRXI","short_pith_number":"pith:YZFSFS5M","schema_version":"1.0","canonical_sha256":"c64b22cbacb365958b712f04c9e871ba2e160440b3c78478f440504f8b25dd5f","source":{"kind":"arxiv","id":"2605.16439","version":1},"attestation_state":"computed","paper":{"title":"KVCapsule: Efficient Sequential KV Cache Compression for Vision-Language Models with Asymmetric Redundancy","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Deming Chen, Steven K. Reinhardt, Tharun Adithya Srikrishnan, Yingbing Huang","submitted_at":"2026-05-14T23:01:58Z","abstract_excerpt":"Vision-Language Models (VLMs) have emerged as a critical and fast-growing extension of Large Language Models (LLMs) that enable multimodal reasoning through both text and image inputs. Although VLMs enrich the capabilities of language models, they also inherit and amplify key computational bottlenecks: the memory overhead caused by the large key-value (KV) cache during autoregressive decoding. This challenge is particularly severe in VLMs, where images produce longer token sequences and denser feature representations compared to text. Moreover, the spatial and information-rich nature of vision"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.16439","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T23:01:58Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"4acfe2e73f2c54f2f8306938bb1b8870600388c69618cb9ec32c82b89d948601","abstract_canon_sha256":"3185a410f67f6a5f1a5937b6be4e5f45d2564f44b4ff3bdf0842291446546533"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:22.115441Z","signature_b64":"KWWaLsxYmmj/gx/mM61FzNNA7ZMroBvxFcxGFl/HzQfaw6OdoFFeTSFJajxSOtXHX7EH1CbCIo0XHUKH02taBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c64b22cbacb365958b712f04c9e871ba2e160440b3c78478f440504f8b25dd5f","last_reissued_at":"2026-05-20T00:02:22.114780Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:22.114780Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"KVCapsule: Efficient Sequential KV Cache Compression for Vision-Language Models with Asymmetric Redundancy","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Deming Chen, Steven K. Reinhardt, Tharun Adithya Srikrishnan, Yingbing Huang","submitted_at":"2026-05-14T23:01:58Z","abstract_excerpt":"Vision-Language Models (VLMs) have emerged as a critical and fast-growing extension of Large Language Models (LLMs) that enable multimodal reasoning through both text and image inputs. Although VLMs enrich the capabilities of language models, they also inherit and amplify key computational bottlenecks: the memory overhead caused by the large key-value (KV) cache during autoregressive decoding. This challenge is particularly severe in VLMs, where images produce longer token sequences and denser feature representations compared to text. Moreover, the spatial and information-rich nature of vision"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16439","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16439/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T19:34:36.576911Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T19:21:57.100098Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"e577934bc94e2cd02445db6ea5b2088ca794b350e6b2658a8a608cd5132c851a"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.16439","created_at":"2026-05-20T00:02:22.114890+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.16439v1","created_at":"2026-05-20T00:02:22.114890+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16439","created_at":"2026-05-20T00:02:22.114890+00:00"},{"alias_kind":"pith_short_12","alias_value":"YZFSFS5MWNSZ","created_at":"2026-05-20T00:02:22.114890+00:00"},{"alias_kind":"pith_short_16","alias_value":"YZFSFS5MWNSZLC3R","created_at":"2026-05-20T00:02:22.114890+00:00"},{"alias_kind":"pith_short_8","alias_value":"YZFSFS5M","created_at":"2026-05-20T00:02:22.114890+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI","json":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI.json","graph_json":"https://pith.science/api/pith-number/YZFSFS5MWNSZLC3RF4CMT2DRXI/graph.json","events_json":"https://pith.science/api/pith-number/YZFSFS5MWNSZLC3RF4CMT2DRXI/events.json","paper":"https://pith.science/paper/YZFSFS5M"},"agent_actions":{"view_html":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI","download_json":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI.json","view_paper":"https://pith.science/paper/YZFSFS5M","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.16439&json=true","fetch_graph":"https://pith.science/api/pith-number/YZFSFS5MWNSZLC3RF4CMT2DRXI/graph.json","fetch_events":"https://pith.science/api/pith-number/YZFSFS5MWNSZLC3RF4CMT2DRXI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI/action/storage_attestation","attest_author":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI/action/author_attestation","sign_citation":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI/action/citation_signature","submit_replication":"https://pith.science/pith/YZFSFS5MWNSZLC3RF4CMT2DRXI/action/replication_record"}},"created_at":"2026-05-20T00:02:22.114890+00:00","updated_at":"2026-05-20T00:02:22.114890+00:00"}