{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BK57NKGZQ3Y3W4F4V2QFTJPWX5","short_pith_number":"pith:BK57NKGZ","schema_version":"1.0","canonical_sha256":"0abbf6a8d986f1bb70bcaea059a5f6bf5eed2eec93ff71d2e790316205d2560e","source":{"kind":"arxiv","id":"2606.30389","version":1},"attestation_state":"computed","paper":{"title":"Predict, Reuse, and Repair: Accelerating Dynamic Sparse Attention for Long-Context LLM Decoding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aditya Dhakal, Dejan Milojicic, Gourav Rattihalli, Junbo Li, Longfei Shangguan, Tianyu Wang, Zhiwei Ren","submitted_at":"2026-06-29T14:43:25Z","abstract_excerpt":"Dynamic sparse attention (DSA) accelerates long-context LLM decoding by attending to only the top-K KV blocks relevant to each query, but it introduces a serialized selection-to-attention dependency that emerges as a new latency bottleneck. We present PRR, a speculate-reuse-repair runtime that exploits temporal locality in DSA selections to predict likely blocks, speculate the attention over them while selection is in flight, and incrementally repair missed blocks once the true selected set is known. PRR uses a lightweight EMA-based predictor, a profiling-guided speculation budget that keeps s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.30389","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T14:43:25Z","cross_cats_sorted":[],"title_canon_sha256":"310ab58e7bd31b1f88962ce132d552333d44e83ea283fd337447205a49e6d733","abstract_canon_sha256":"4f65057ff97ccbf6f6cdbd7af9221e21165e73b66c02fe5fb5f4b402c1552d91"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T02:18:13.062764Z","signature_b64":"C6viv9ElcJJhariffvYJE6pr9cTPRMDCz0Sd481oKEetI0WroMmZ/N2uU/ihg5yxSZKk1ix5WHZrKzzO5nQiBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0abbf6a8d986f1bb70bcaea059a5f6bf5eed2eec93ff71d2e790316205d2560e","last_reissued_at":"2026-06-30T02:18:13.062279Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T02:18:13.062279Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Predict, Reuse, and Repair: Accelerating Dynamic Sparse Attention for Long-Context LLM Decoding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aditya Dhakal, Dejan Milojicic, Gourav Rattihalli, Junbo Li, Longfei Shangguan, Tianyu Wang, Zhiwei Ren","submitted_at":"2026-06-29T14:43:25Z","abstract_excerpt":"Dynamic sparse attention (DSA) accelerates long-context LLM decoding by attending to only the top-K KV blocks relevant to each query, but it introduces a serialized selection-to-attention dependency that emerges as a new latency bottleneck. We present PRR, a speculate-reuse-repair runtime that exploits temporal locality in DSA selections to predict likely blocks, speculate the attention over them while selection is in flight, and incrementally repair missed blocks once the true selected set is known. PRR uses a lightweight EMA-based predictor, a profiling-guided speculation budget that keeps s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30389","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.30389/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.30389","created_at":"2026-06-30T02:18:13.062354+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.30389v1","created_at":"2026-06-30T02:18:13.062354+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30389","created_at":"2026-06-30T02:18:13.062354+00:00"},{"alias_kind":"pith_short_12","alias_value":"BK57NKGZQ3Y3","created_at":"2026-06-30T02:18:13.062354+00:00"},{"alias_kind":"pith_short_16","alias_value":"BK57NKGZQ3Y3W4F4","created_at":"2026-06-30T02:18:13.062354+00:00"},{"alias_kind":"pith_short_8","alias_value":"BK57NKGZ","created_at":"2026-06-30T02:18:13.062354+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5","json":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5.json","graph_json":"https://pith.science/api/pith-number/BK57NKGZQ3Y3W4F4V2QFTJPWX5/graph.json","events_json":"https://pith.science/api/pith-number/BK57NKGZQ3Y3W4F4V2QFTJPWX5/events.json","paper":"https://pith.science/paper/BK57NKGZ"},"agent_actions":{"view_html":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5","download_json":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5.json","view_paper":"https://pith.science/paper/BK57NKGZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.30389&json=true","fetch_graph":"https://pith.science/api/pith-number/BK57NKGZQ3Y3W4F4V2QFTJPWX5/graph.json","fetch_events":"https://pith.science/api/pith-number/BK57NKGZQ3Y3W4F4V2QFTJPWX5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5/action/storage_attestation","attest_author":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5/action/author_attestation","sign_citation":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5/action/citation_signature","submit_replication":"https://pith.science/pith/BK57NKGZQ3Y3W4F4V2QFTJPWX5/action/replication_record"}},"created_at":"2026-06-30T02:18:13.062354+00:00","updated_at":"2026-06-30T02:18:13.062354+00:00"}