{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CTZZCS5M2KBRYAV5G645QMVADE","short_pith_number":"pith:CTZZCS5M","schema_version":"1.0","canonical_sha256":"14f3914bacd2831c02bd37b9d832a0193205228750734d74b17efd069d3638bd","source":{"kind":"arxiv","id":"2605.17757","version":1},"attestation_state":"computed","paper":{"title":"OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.DC","cs.PF"],"primary_cat":"cs.LG","authors_text":"Ben Athiwaratkun, Donglin Zhuang, Jisen Li, Shuaiwen Leon Song, Xiaoxia Wu, Zhongzhu Zhou, Ziyan Chen","submitted_at":"2026-05-18T02:24:29Z","abstract_excerpt":"INT2 KV-cache quantization is attractive for long-context LLM serving, but it remains difficult to make both accurate and deployable. Simple rotations such as Hadamard transforms reduce outliers, but still degrade at INT2 because they are not aligned with downstream attention. We propose OSCAR, an Ultra-low-bit KV Cache quantization method that estimates attention-aware covariance structures offline and uses them to derive fixed rotations and clipping thresholds for quantization. In this way, it aligns KV quantization with the covariance structures that attention actually consumes. More import"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.17757","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T02:24:29Z","cross_cats_sorted":["cs.AI","cs.DC","cs.PF"],"title_canon_sha256":"decb1ba71b854cfca8bb143f4a05024b3ceac34d741a5117f2b4171dd5f216bd","abstract_canon_sha256":"7166df38a1b18134501298df1f128066ef1097460ac490402d4f0135401367b3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:56.672013Z","signature_b64":"tHj/norS51PxYY9y0odcUmMEPp2CzNJMzrPR/2PgfElnJuK5roDlo4OHEWTT/0vw4f2SfC7ZLYnoJU72/ig+Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"14f3914bacd2831c02bd37b9d832a0193205228750734d74b17efd069d3638bd","last_reissued_at":"2026-05-20T00:04:56.671175Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:56.671175Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"OSCAR: Offline Spectral Covariance-Aware Rotation for 2-bit KV Cache Quantization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.DC","cs.PF"],"primary_cat":"cs.LG","authors_text":"Ben Athiwaratkun, Donglin Zhuang, Jisen Li, Shuaiwen Leon Song, Xiaoxia Wu, Zhongzhu Zhou, Ziyan Chen","submitted_at":"2026-05-18T02:24:29Z","abstract_excerpt":"INT2 KV-cache quantization is attractive for long-context LLM serving, but it remains difficult to make both accurate and deployable. Simple rotations such as Hadamard transforms reduce outliers, but still degrade at INT2 because they are not aligned with downstream attention. We propose OSCAR, an Ultra-low-bit KV Cache quantization method that estimates attention-aware covariance structures offline and uses them to derive fixed rotations and clipping thresholds for quantization. In this way, it aligns KV quantization with the covariance structures that attention actually consumes. More import"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17757","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17757/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.17757","created_at":"2026-05-20T00:04:56.671326+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.17757v1","created_at":"2026-05-20T00:04:56.671326+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17757","created_at":"2026-05-20T00:04:56.671326+00:00"},{"alias_kind":"pith_short_12","alias_value":"CTZZCS5M2KBR","created_at":"2026-05-20T00:04:56.671326+00:00"},{"alias_kind":"pith_short_16","alias_value":"CTZZCS5M2KBRYAV5","created_at":"2026-05-20T00:04:56.671326+00:00"},{"alias_kind":"pith_short_8","alias_value":"CTZZCS5M","created_at":"2026-05-20T00:04:56.671326+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE","json":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE.json","graph_json":"https://pith.science/api/pith-number/CTZZCS5M2KBRYAV5G645QMVADE/graph.json","events_json":"https://pith.science/api/pith-number/CTZZCS5M2KBRYAV5G645QMVADE/events.json","paper":"https://pith.science/paper/CTZZCS5M"},"agent_actions":{"view_html":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE","download_json":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE.json","view_paper":"https://pith.science/paper/CTZZCS5M","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.17757&json=true","fetch_graph":"https://pith.science/api/pith-number/CTZZCS5M2KBRYAV5G645QMVADE/graph.json","fetch_events":"https://pith.science/api/pith-number/CTZZCS5M2KBRYAV5G645QMVADE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE/action/storage_attestation","attest_author":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE/action/author_attestation","sign_citation":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE/action/citation_signature","submit_replication":"https://pith.science/pith/CTZZCS5M2KBRYAV5G645QMVADE/action/replication_record"}},"created_at":"2026-05-20T00:04:56.671326+00:00","updated_at":"2026-05-20T00:04:56.671326+00:00"}