{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:U4UJ2ELZGA3OUGA3DULMXA67TX","short_pith_number":"pith:U4UJ2ELZ","schema_version":"1.0","canonical_sha256":"a7289d11793036ea181b1d16cb83df9de5baede13fda811e8cb878f36dc018ed","source":{"kind":"arxiv","id":"2510.00231","version":2},"attestation_state":"computed","paper":{"title":"The Pitfalls of KV Cache Compression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Aditya Grover, Alex Chen, Daniel Israel, Guy Van den Broeck, Renato Geh","submitted_at":"2025-09-30T19:55:26Z","abstract_excerpt":"KV cache compression promises increased throughput and efficiency with negligible loss in performance. While the gains in throughput are indisputable and recent literature has indeed shown minimal degradation on particular benchmarks, in general the consequences of compression in realistic scenarios such as multi-instruction prompting have been insufficiently studied. In this paper, we identify several pitfalls that practitioners should be aware of when deploying KV cache compressed LLMs. We evaluate five KV cache compression methods (StreamingLLM, SnapKV, TOVA, H2O, and K-Norm) on Llama3.1 8B"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.00231","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-30T19:55:26Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c601479fdd1965425b3cd2e4d82322594688318a3a822b96cbd93dec6a34f2eb","abstract_canon_sha256":"81a4a0ed10bfaa7c53b671a465caeeb0838cea35ddb320dc389b908cc99f8f87"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:17.383585Z","signature_b64":"Rvu9FTJcywWhrUfnqRy0E8kyl1gJb7sS40QKWlPV5kpw6o6V5a4EC2Z/Pjl3YmCUXzWXmuPILxDXpH8uAcudCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a7289d11793036ea181b1d16cb83df9de5baede13fda811e8cb878f36dc018ed","last_reissued_at":"2026-05-17T23:39:17.382808Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:17.382808Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Pitfalls of KV Cache Compression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Aditya Grover, Alex Chen, Daniel Israel, Guy Van den Broeck, Renato Geh","submitted_at":"2025-09-30T19:55:26Z","abstract_excerpt":"KV cache compression promises increased throughput and efficiency with negligible loss in performance. While the gains in throughput are indisputable and recent literature has indeed shown minimal degradation on particular benchmarks, in general the consequences of compression in realistic scenarios such as multi-instruction prompting have been insufficiently studied. In this paper, we identify several pitfalls that practitioners should be aware of when deploying KV cache compressed LLMs. We evaluate five KV cache compression methods (StreamingLLM, SnapKV, TOVA, H2O, and K-Norm) on Llama3.1 8B"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.00231","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.00231","created_at":"2026-05-17T23:39:17.382907+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.00231v2","created_at":"2026-05-17T23:39:17.382907+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.00231","created_at":"2026-05-17T23:39:17.382907+00:00"},{"alias_kind":"pith_short_12","alias_value":"U4UJ2ELZGA3O","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"U4UJ2ELZGA3OUGA3","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"U4UJ2ELZ","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2604.08426","citing_title":"KV Cache Offloading for Context-Intensive Tasks","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2603.09002","citing_title":"Security Considerations for Multi-agent Systems","ref_index":225,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08426","citing_title":"KV Cache Offloading for Context-Intensive Tasks","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08234","citing_title":"When Does Value-Aware KV Eviction Help? A Fixed-Contract Diagnostic for Non-Monotone Cache Compression","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08426","citing_title":"KV Cache Offloading for Context-Intensive Tasks","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08426","citing_title":"KV Cache Offloading for Context-Intensive Tasks","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX","json":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX.json","graph_json":"https://pith.science/api/pith-number/U4UJ2ELZGA3OUGA3DULMXA67TX/graph.json","events_json":"https://pith.science/api/pith-number/U4UJ2ELZGA3OUGA3DULMXA67TX/events.json","paper":"https://pith.science/paper/U4UJ2ELZ"},"agent_actions":{"view_html":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX","download_json":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX.json","view_paper":"https://pith.science/paper/U4UJ2ELZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.00231&json=true","fetch_graph":"https://pith.science/api/pith-number/U4UJ2ELZGA3OUGA3DULMXA67TX/graph.json","fetch_events":"https://pith.science/api/pith-number/U4UJ2ELZGA3OUGA3DULMXA67TX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX/action/storage_attestation","attest_author":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX/action/author_attestation","sign_citation":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX/action/citation_signature","submit_replication":"https://pith.science/pith/U4UJ2ELZGA3OUGA3DULMXA67TX/action/replication_record"}},"created_at":"2026-05-17T23:39:17.382907+00:00","updated_at":"2026-05-17T23:39:17.382907+00:00"}