{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:6S2KKZLV64O6J4OIQJLMDQIB4U","short_pith_number":"pith:6S2KKZLV","canonical_record":{"source":{"id":"2407.11550","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-16T09:53:32Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"467a4db061812a003b56fa4551f1967a4777d9f127f2d8e30f7c49e751d72a89","abstract_canon_sha256":"d97adabc45e8212c7ade479ef329942d4caf3844171883c7112194e1e9466758"},"schema_version":"1.0"},"canonical_sha256":"f4b4a56575f71de4f1c88256c1c101e52500e026ad0ac91a66bfe3487091362d","source":{"kind":"arxiv","id":"2407.11550","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2407.11550","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2407.11550v5","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.11550","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"6S2KKZLV64O6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"6S2KKZLV64O6J4OI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"6S2KKZLV","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:6S2KKZLV64O6J4OIQJLMDQIB4U","target":"record","payload":{"canonical_record":{"source":{"id":"2407.11550","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-16T09:53:32Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"467a4db061812a003b56fa4551f1967a4777d9f127f2d8e30f7c49e751d72a89","abstract_canon_sha256":"d97adabc45e8212c7ade479ef329942d4caf3844171883c7112194e1e9466758"},"schema_version":"1.0"},"canonical_sha256":"f4b4a56575f71de4f1c88256c1c101e52500e026ad0ac91a66bfe3487091362d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:14.260849Z","signature_b64":"cno2Y824AXEEKUCvoDa5BNbsieAMCARq4k8Ci2b891cLUylpRCsdt8BAwqom1aMCQBWtn2K+c3ZMBnBWVAHqDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f4b4a56575f71de4f1c88256c1c101e52500e026ad0ac91a66bfe3487091362d","last_reissued_at":"2026-05-17T23:38:14.260301Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:14.260301Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2407.11550","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Jf3mR48UBhDvremFmyBIMeBWh90iWrpoD9bUqTB3SjQtq+q5l7RFKLFop/puY2CzQOuH849/GgI9oDZCFvoRCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T06:59:44.184801Z"},"content_sha256":"962c87707354de11959b8bfb2809898b637c1ff3332e5ffad7d1ce45fa634caa","schema_version":"1.0","event_id":"sha256:962c87707354de11959b8bfb2809898b637c1ff3332e5ffad7d1ce45fa634caa"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:6S2KKZLV64O6J4OIQJLMDQIB4U","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Ada-KV: Optimizing KV Cache Eviction by Adaptive Budget Allocation for Efficient LLM Inference","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A theoretical upper bound on attention loss from KV cache eviction enables adaptive per-head budget allocation.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Junlin Lv, S. Kevin Zhou, Xike Xie, Yuan Feng, Yukun Cao","submitted_at":"2024-07-16T09:53:32Z","abstract_excerpt":"Large Language Models have excelled in various domains but face efficiency challenges due to the growing Key-Value (KV) cache required for long-sequence inference. Recent efforts aim to reduce KV cache size by evicting vast non-critical cache elements during runtime while preserving generation quality. However, these methods typically allocate compression budgets uniformly across all attention heads, ignoring the unique attention patterns of each head. In this paper, we establish a theoretical loss upper bound between pre- and post-eviction attention output, explaining the optimization target "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"we establish a theoretical loss upper bound between pre- and post-eviction attention output, explaining the optimization target of prior cache eviction methods, while guiding the optimization of adaptive budget allocation. Base on this, we propose Ada-KV, the first head-wise adaptive budget allocation strategy.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The derived loss upper bound accurately captures the quality impact of eviction and that attention heads exhibit sufficiently distinct patterns to benefit from non-uniform budget allocation without introducing new approximation errors that undermine the bound.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Ada-KV is the first head-wise adaptive KV cache budget allocator for LLMs, using a theoretical loss upper bound to allocate eviction differently per attention head and yielding higher quality than uniform methods on long-context benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A theoretical upper bound on attention loss from KV cache eviction enables adaptive per-head budget allocation.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f1f2eaa8327444c68ab696c41443b266a5201651194098774638041c361fd012"},"source":{"id":"2407.11550","kind":"arxiv","version":5},"verdict":{"id":"b566957a-dfdc-4128-b9ad-7615a80557c3","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T11:12:08.412231Z","strongest_claim":"we establish a theoretical loss upper bound between pre- and post-eviction attention output, explaining the optimization target of prior cache eviction methods, while guiding the optimization of adaptive budget allocation. Base on this, we propose Ada-KV, the first head-wise adaptive budget allocation strategy.","one_line_summary":"Ada-KV is the first head-wise adaptive KV cache budget allocator for LLMs, using a theoretical loss upper bound to allocate eviction differently per attention head and yielding higher quality than uniform methods on long-context benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The derived loss upper bound accurately captures the quality impact of eviction and that attention heads exhibit sufficiently distinct patterns to benefit from non-uniform budget allocation without introducing new approximation errors that undermine the bound.","pith_extraction_headline":"A theoretical upper bound on attention loss from KV cache eviction enables adaptive per-head budget allocation."},"references":{"count":69,"sample":[{"doi":"","year":2024,"title":"A survey on recent advances in llm-based multi-turn dialogue systems","work_id":"d4419b98-6427-493f-8afe-3e4d0d22a371","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Summedits: measuring llm ability at factual reasoning through the lens of summarization","work_id":"00288d86-bd7b-4705-86cc-8d173f1a99e3","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Llm-based code generation method for golang compiler testing","work_id":"2d74f7bf-f26e-4dd9-8eaf-f3343afb4df8","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","ref_index":4,"cited_arxiv_id":"2303.08774","is_internal_anchor":true},{"doi":"","year":2024,"title":"The claude 3 model family: Opus, sonnet, haiku, March 2024","work_id":"33e09cc4-5444-41b5-bbd0-30293fce35f4","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":69,"snapshot_sha256":"63764eb95801d93ae25c386d20df37b374c5f98fef846aaaead983c882feac69","internal_anchors":16},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d052083b511e4137887db1181208650060845997478fb9859ca756e480047d20"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"b566957a-dfdc-4128-b9ad-7615a80557c3"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jc9GP07JF+6qdEVY+xrWtSfPD99RpUppccj9HiCXyhg/77qVpTN4lk44kBUdgxACgsnTTmVKCz5QtZpzak7cDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T06:59:44.185987Z"},"content_sha256":"95e9d6861353d9fb3e13d5c0a4093a97ac620c874f9104d36c3f477842077cab","schema_version":"1.0","event_id":"sha256:95e9d6861353d9fb3e13d5c0a4093a97ac620c874f9104d36c3f477842077cab"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6S2KKZLV64O6J4OIQJLMDQIB4U/bundle.json","state_url":"https://pith.science/pith/6S2KKZLV64O6J4OIQJLMDQIB4U/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6S2KKZLV64O6J4OIQJLMDQIB4U/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T06:59:44Z","links":{"resolver":"https://pith.science/pith/6S2KKZLV64O6J4OIQJLMDQIB4U","bundle":"https://pith.science/pith/6S2KKZLV64O6J4OIQJLMDQIB4U/bundle.json","state":"https://pith.science/pith/6S2KKZLV64O6J4OIQJLMDQIB4U/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6S2KKZLV64O6J4OIQJLMDQIB4U/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:6S2KKZLV64O6J4OIQJLMDQIB4U","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d97adabc45e8212c7ade479ef329942d4caf3844171883c7112194e1e9466758","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-16T09:53:32Z","title_canon_sha256":"467a4db061812a003b56fa4551f1967a4777d9f127f2d8e30f7c49e751d72a89"},"schema_version":"1.0","source":{"id":"2407.11550","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2407.11550","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2407.11550v5","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.11550","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"6S2KKZLV64O6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"6S2KKZLV64O6J4OI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"6S2KKZLV","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:95e9d6861353d9fb3e13d5c0a4093a97ac620c874f9104d36c3f477842077cab","target":"graph","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we establish a theoretical loss upper bound between pre- and post-eviction attention output, explaining the optimization target of prior cache eviction methods, while guiding the optimization of adaptive budget allocation. Base on this, we propose Ada-KV, the first head-wise adaptive budget allocation strategy."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The derived loss upper bound accurately captures the quality impact of eviction and that attention heads exhibit sufficiently distinct patterns to benefit from non-uniform budget allocation without introducing new approximation errors that undermine the bound."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Ada-KV is the first head-wise adaptive KV cache budget allocator for LLMs, using a theoretical loss upper bound to allocate eviction differently per attention head and yielding higher quality than uniform methods on long-context benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A theoretical upper bound on attention loss from KV cache eviction enables adaptive per-head budget allocation."}],"snapshot_sha256":"f1f2eaa8327444c68ab696c41443b266a5201651194098774638041c361fd012"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d052083b511e4137887db1181208650060845997478fb9859ca756e480047d20"},"paper":{"abstract_excerpt":"Large Language Models have excelled in various domains but face efficiency challenges due to the growing Key-Value (KV) cache required for long-sequence inference. Recent efforts aim to reduce KV cache size by evicting vast non-critical cache elements during runtime while preserving generation quality. However, these methods typically allocate compression budgets uniformly across all attention heads, ignoring the unique attention patterns of each head. In this paper, we establish a theoretical loss upper bound between pre- and post-eviction attention output, explaining the optimization target ","authors_text":"Junlin Lv, S. Kevin Zhou, Xike Xie, Yuan Feng, Yukun Cao","cross_cats":["cs.AI"],"headline":"A theoretical upper bound on attention loss from KV cache eviction enables adaptive per-head budget allocation.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-16T09:53:32Z","title":"Ada-KV: Optimizing KV Cache Eviction by Adaptive Budget Allocation for Efficient LLM Inference"},"references":{"count":69,"internal_anchors":16,"resolved_work":69,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"A survey on recent advances in llm-based multi-turn dialogue systems","work_id":"d4419b98-6427-493f-8afe-3e4d0d22a371","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Summedits: measuring llm ability at factual reasoning through the lens of summarization","work_id":"00288d86-bd7b-4705-86cc-8d173f1a99e3","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Llm-based code generation method for golang compiler testing","work_id":"2d74f7bf-f26e-4dd9-8eaf-f3343afb4df8","year":2023},{"cited_arxiv_id":"2303.08774","doi":"","is_internal_anchor":true,"ref_index":4,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"The claude 3 model family: Opus, sonnet, haiku, March 2024","work_id":"33e09cc4-5444-41b5-bbd0-30293fce35f4","year":2024}],"snapshot_sha256":"63764eb95801d93ae25c386d20df37b374c5f98fef846aaaead983c882feac69"},"source":{"id":"2407.11550","kind":"arxiv","version":5},"verdict":{"created_at":"2026-05-17T11:12:08.412231Z","id":"b566957a-dfdc-4128-b9ad-7615a80557c3","model_set":{"reader":"grok-4.3"},"one_line_summary":"Ada-KV is the first head-wise adaptive KV cache budget allocator for LLMs, using a theoretical loss upper bound to allocate eviction differently per attention head and yielding higher quality than uniform methods on long-context benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A theoretical upper bound on attention loss from KV cache eviction enables adaptive per-head budget allocation.","strongest_claim":"we establish a theoretical loss upper bound between pre- and post-eviction attention output, explaining the optimization target of prior cache eviction methods, while guiding the optimization of adaptive budget allocation. Base on this, we propose Ada-KV, the first head-wise adaptive budget allocation strategy.","weakest_assumption":"The derived loss upper bound accurately captures the quality impact of eviction and that attention heads exhibit sufficiently distinct patterns to benefit from non-uniform budget allocation without introducing new approximation errors that undermine the bound."}},"verdict_id":"b566957a-dfdc-4128-b9ad-7615a80557c3"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:962c87707354de11959b8bfb2809898b637c1ff3332e5ffad7d1ce45fa634caa","target":"record","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d97adabc45e8212c7ade479ef329942d4caf3844171883c7112194e1e9466758","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-16T09:53:32Z","title_canon_sha256":"467a4db061812a003b56fa4551f1967a4777d9f127f2d8e30f7c49e751d72a89"},"schema_version":"1.0","source":{"id":"2407.11550","kind":"arxiv","version":5}},"canonical_sha256":"f4b4a56575f71de4f1c88256c1c101e52500e026ad0ac91a66bfe3487091362d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f4b4a56575f71de4f1c88256c1c101e52500e026ad0ac91a66bfe3487091362d","first_computed_at":"2026-05-17T23:38:14.260301Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:14.260301Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"cno2Y824AXEEKUCvoDa5BNbsieAMCARq4k8Ci2b891cLUylpRCsdt8BAwqom1aMCQBWtn2K+c3ZMBnBWVAHqDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:14.260849Z","signed_message":"canonical_sha256_bytes"},"source_id":"2407.11550","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:962c87707354de11959b8bfb2809898b637c1ff3332e5ffad7d1ce45fa634caa","sha256:95e9d6861353d9fb3e13d5c0a4093a97ac620c874f9104d36c3f477842077cab"],"state_sha256":"7a06dd355acd312039f8116a5a561a50e6591580464d97628f2db4764f48f2f3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zuD34FG++xQytOeBTYSLw21gEP/1/Z0VbqzpidFt6NHfPtM+YAKRHfd6DkVyJc2Y6QNhTvH1QJUzzOqPGsHBAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T06:59:44.191143Z","bundle_sha256":"2fb40b7e70858c5556762cf38f7b18a758650819192a89f92b22cfb8fe0d4f1d"}}