{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:7NFM733K67POPV6KFZSVDDHNHS","short_pith_number":"pith:7NFM733K","canonical_record":{"source":{"id":"2602.23200","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-26T16:50:36Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"bf071579f7edbe026a49975900bdd82cc6928c775277f70bb0a5250b7cae76ae","abstract_canon_sha256":"6dbd8fcb493ca7480b46baad824149ef4622096e507476ac9ee6cddd2dfce16f"},"schema_version":"1.0"},"canonical_sha256":"fb4acfef6af7dee7d7ca2e65518ced3cbbf7028efbfc68cfcef66329f00bead7","source":{"kind":"arxiv","id":"2602.23200","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.23200","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"arxiv_version","alias_value":"2602.23200v2","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.23200","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"pith_short_12","alias_value":"7NFM733K67PO","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"pith_short_16","alias_value":"7NFM733K67POPV6K","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"pith_short_8","alias_value":"7NFM733K","created_at":"2026-05-22T01:03:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:7NFM733K67POPV6KFZSVDDHNHS","target":"record","payload":{"canonical_record":{"source":{"id":"2602.23200","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-26T16:50:36Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"bf071579f7edbe026a49975900bdd82cc6928c775277f70bb0a5250b7cae76ae","abstract_canon_sha256":"6dbd8fcb493ca7480b46baad824149ef4622096e507476ac9ee6cddd2dfce16f"},"schema_version":"1.0"},"canonical_sha256":"fb4acfef6af7dee7d7ca2e65518ced3cbbf7028efbfc68cfcef66329f00bead7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:03:18.604612Z","signature_b64":"KnE4qe53cLTJdovuXxzVLW8qXR2iPe+Ju6xCd6nv/rkCqedvQsNLMll9KeJsSI9/eSl6qgWkjGopmXapNXeyBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fb4acfef6af7dee7d7ca2e65518ced3cbbf7028efbfc68cfcef66329f00bead7","last_reissued_at":"2026-05-22T01:03:18.603750Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:03:18.603750Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.23200","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"c1w6zt3CSWY0BZBhdz0CjEhlwLz1oZ5c92HG0rNu1+S/T2x0BIdg8N0gc7awojDuen+3+ytEBMJ/LYtueyuFCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T07:53:43.163509Z"},"content_sha256":"09e9bf17872d72a2cc3f6b6fe3279a4f7440dafa909c9d75259315b17ed830f1","schema_version":"1.0","event_id":"sha256:09e9bf17872d72a2cc3f6b6fe3279a4f7440dafa909c9d75259315b17ed830f1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:7NFM733K67POPV6KFZSVDDHNHS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"InnerQ: Hardware-Aware Tuning-Free Quantization of KV Cache for Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Amir Ardakani, Sayed Mohammadreza Tayaranian Hosseini, Warren J. Gross","submitted_at":"2026-02-26T16:50:36Z","abstract_excerpt":"When transformer-based language models are deployed for text generation, most of the inference time is spent in the decoding stage, where output tokens are generated sequentially. Reducing the hardware cost of each decoding step is therefore critical for efficient long-context generation. A major bottleneck is the key-value (KV) cache, whose size grows with sequence length and often dominates the model's memory footprint. Prior work has proposed quantization methods to compress the KV cache while minimizing its loss of precision. We present InnerQ, a hardware-aware KV cache quantization scheme"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.23200","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.23200/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"evne4v9Ex4O1ONJDqghJf/pSwXaqxqJBY7whpOJBwID0NDXAf7VqK9boGCKG5WTpeuBAC8l0HAnHH9m1ZFhPAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T07:53:43.164206Z"},"content_sha256":"ab4412bcef9282fe20ac47e5a2bcbb26968b9ebe5051320bf31162c4d943996e","schema_version":"1.0","event_id":"sha256:ab4412bcef9282fe20ac47e5a2bcbb26968b9ebe5051320bf31162c4d943996e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7NFM733K67POPV6KFZSVDDHNHS/bundle.json","state_url":"https://pith.science/pith/7NFM733K67POPV6KFZSVDDHNHS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7NFM733K67POPV6KFZSVDDHNHS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T07:53:43Z","links":{"resolver":"https://pith.science/pith/7NFM733K67POPV6KFZSVDDHNHS","bundle":"https://pith.science/pith/7NFM733K67POPV6KFZSVDDHNHS/bundle.json","state":"https://pith.science/pith/7NFM733K67POPV6KFZSVDDHNHS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7NFM733K67POPV6KFZSVDDHNHS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:7NFM733K67POPV6KFZSVDDHNHS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6dbd8fcb493ca7480b46baad824149ef4622096e507476ac9ee6cddd2dfce16f","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-26T16:50:36Z","title_canon_sha256":"bf071579f7edbe026a49975900bdd82cc6928c775277f70bb0a5250b7cae76ae"},"schema_version":"1.0","source":{"id":"2602.23200","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.23200","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"arxiv_version","alias_value":"2602.23200v2","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.23200","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"pith_short_12","alias_value":"7NFM733K67PO","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"pith_short_16","alias_value":"7NFM733K67POPV6K","created_at":"2026-05-22T01:03:18Z"},{"alias_kind":"pith_short_8","alias_value":"7NFM733K","created_at":"2026-05-22T01:03:18Z"}],"graph_snapshots":[{"event_id":"sha256:ab4412bcef9282fe20ac47e5a2bcbb26968b9ebe5051320bf31162c4d943996e","target":"graph","created_at":"2026-05-22T01:03:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.23200/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"When transformer-based language models are deployed for text generation, most of the inference time is spent in the decoding stage, where output tokens are generated sequentially. Reducing the hardware cost of each decoding step is therefore critical for efficient long-context generation. A major bottleneck is the key-value (KV) cache, whose size grows with sequence length and often dominates the model's memory footprint. Prior work has proposed quantization methods to compress the KV cache while minimizing its loss of precision. We present InnerQ, a hardware-aware KV cache quantization scheme","authors_text":"Amir Ardakani, Sayed Mohammadreza Tayaranian Hosseini, Warren J. Gross","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-26T16:50:36Z","title":"InnerQ: Hardware-Aware Tuning-Free Quantization of KV Cache for Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.23200","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:09e9bf17872d72a2cc3f6b6fe3279a4f7440dafa909c9d75259315b17ed830f1","target":"record","created_at":"2026-05-22T01:03:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6dbd8fcb493ca7480b46baad824149ef4622096e507476ac9ee6cddd2dfce16f","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-26T16:50:36Z","title_canon_sha256":"bf071579f7edbe026a49975900bdd82cc6928c775277f70bb0a5250b7cae76ae"},"schema_version":"1.0","source":{"id":"2602.23200","kind":"arxiv","version":2}},"canonical_sha256":"fb4acfef6af7dee7d7ca2e65518ced3cbbf7028efbfc68cfcef66329f00bead7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fb4acfef6af7dee7d7ca2e65518ced3cbbf7028efbfc68cfcef66329f00bead7","first_computed_at":"2026-05-22T01:03:18.603750Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:03:18.603750Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KnE4qe53cLTJdovuXxzVLW8qXR2iPe+Ju6xCd6nv/rkCqedvQsNLMll9KeJsSI9/eSl6qgWkjGopmXapNXeyBQ==","signature_status":"signed_v1","signed_at":"2026-05-22T01:03:18.604612Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.23200","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:09e9bf17872d72a2cc3f6b6fe3279a4f7440dafa909c9d75259315b17ed830f1","sha256:ab4412bcef9282fe20ac47e5a2bcbb26968b9ebe5051320bf31162c4d943996e"],"state_sha256":"caa03263106f2749f396d0d991bc24829183a99430688954fb1d791620db5a01"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7AWZJq2jzsvrAjucxLa50oKEwv3BJ3E3HdYnfHJmMo39ThnDKYzqkXDqFPG9ECmjDL8xtU6N1mTEGZlU4YtlBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T07:53:43.167986Z","bundle_sha256":"2ad883733825c4fff87b2050a6d36d87e1a87e7d7e57b62250107967bc2fad5d"}}