{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ZW7EPVPMPWXCIKVVDYLGAOW7AF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ef7d15d1c85b772b12482d2eac7e4a4fb8110381d6e89bf553e2b4cd7e9cd9b6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T11:20:52Z","title_canon_sha256":"fa1e1725ee3ae24693c11dfbabdb77c0a6072cb5c06c8b2bdefb8a42cd853285"},"schema_version":"1.0","source":{"id":"2605.13360","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13360","created_at":"2026-05-18T02:44:48Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13360v2","created_at":"2026-05-18T02:44:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13360","created_at":"2026-05-18T02:44:48Z"},{"alias_kind":"pith_short_12","alias_value":"ZW7EPVPMPWXC","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"ZW7EPVPMPWXCIKVV","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"ZW7EPVPM","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:eea37515a3aa323a19408d7aa0b0ff5cb8106c507f948b8f9c99af7958ab9476","target":"graph","created_at":"2026-05-18T02:44:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"For strong cloud models, our method can be applied out-of-the-box to existing real-time cloud APIs, providing 1.3-1.7× speedups with minor accuracy loss. ... Altogether, this approach provides 1.6-2.2× speedups with the Qwen2.5-3B-Instruct and Llama-3.2-3B-Instruct models across multiple tool calling benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That speculative tool calling incurs only minor accuracy loss and that the clock-based training on synthetic data generalizes to real user interactions without introducing errors from premature tool calls."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Speculative Interaction Agents achieve 1.3-2.2x speedups for real-time tool-calling agents via async I/O decoupling and speculative calls, with clock-based training for small edge models."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Speculative Interaction Agents reduce real-time tool-calling latency by overlapping external waits with reasoning and executing tools on partial information."}],"snapshot_sha256":"98b04b67eaa8b96b84f91b01b856fe89b7149ee6416c0f8a47ac4c51f50a5fd6"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"There is a growing demand for agentic AI technologies for a range of downstream applications like customer service and personal assistants. For applications where the agent needs to interact with a person, real-time low-latency responsiveness is required; for example, with voice-controlled applications, under 1 second of latency is typically required for the interaction to feel seamless. However, if we want the LLM to reason and execute an agentic workflow with tool calling, this can add several seconds or more of latency, which is prohibitive for real-time latency-sensitive applications. In o","authors_text":"Amir Gholami, Coleman Hooper, Eric Wen, John Wawrzynek, Kurt Keutzer, Michael W. Mahoney, Minwoo Kang, Nicholas Lee, Suhong Moon, Yakun Sophia Shao","cross_cats":[],"headline":"Speculative Interaction Agents reduce real-time tool-calling latency by overlapping external waits with reasoning and executing tools on partial information.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T11:20:52Z","title":"Speculative Interaction Agents: Building Real-Time Agents with Asynchronous I/O and Speculative Tool Calling"},"references":{"count":17,"internal_anchors":5,"resolved_work":17,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Speakrl: Synergizing reasoning, speaking, and act- ing in language models with reinforcement learning.arXiv preprint arXiv:2512.13159,","work_id":"84f8d804-cbc6-455d-a2ea-6f417e9370eb","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Stream rag: Instant and accurate spoken dialogue systems with streaming tool usage.arXiv preprint arXiv:2510.02044,","work_id":"c5cfd1a6-97b4-4d0a-b87e-dc513dd69a50","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Whisperx: Time-accurate speech transcription of long-form audio.INTERSPEECH 2023,","work_id":"02bd7860-cacd-47a7-9446-fdd8d6c44e65","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Alexandre Défossez, Laurent Mazaré, Manu Orsini, Amélie Royer, Patrick Pérez, Hervé Jégou, Edouard Grave, and Neil Zeghidour","work_id":"3f7fb0be-0054-4224-a50f-f45e5c86cbc9","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"E., Lee, N., Jha, S., Kim, S., Tabrizi, R., Moon, S., Hooper, C., Anumanchipalli, G., Keutzer, K., and Gholami, A","work_id":"cb7a8776-c507-490d-bef3-1b3be9936552","year":null}],"snapshot_sha256":"95211074588445fe751c3abd4356f3ba3cb82e858f8d7bd0945454e8e2c0f862"},"source":{"id":"2605.13360","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T05:23:09.803033Z","id":"7417565f-0c8a-4066-bc87-a7fa666bf61b","model_set":{"reader":"grok-4.3"},"one_line_summary":"Speculative Interaction Agents achieve 1.3-2.2x speedups for real-time tool-calling agents via async I/O decoupling and speculative calls, with clock-based training for small edge models.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Speculative Interaction Agents reduce real-time tool-calling latency by overlapping external waits with reasoning and executing tools on partial information.","strongest_claim":"For strong cloud models, our method can be applied out-of-the-box to existing real-time cloud APIs, providing 1.3-1.7× speedups with minor accuracy loss. ... Altogether, this approach provides 1.6-2.2× speedups with the Qwen2.5-3B-Instruct and Llama-3.2-3B-Instruct models across multiple tool calling benchmarks.","weakest_assumption":"That speculative tool calling incurs only minor accuracy loss and that the clock-based training on synthetic data generalizes to real user interactions without introducing errors from premature tool calls."}},"verdict_id":"7417565f-0c8a-4066-bc87-a7fa666bf61b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e8b717ba06869333ba6196f491bbdbf9939a71ed7ba76d9ba8fb9dfbf2d98f14","target":"record","created_at":"2026-05-18T02:44:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ef7d15d1c85b772b12482d2eac7e4a4fb8110381d6e89bf553e2b4cd7e9cd9b6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T11:20:52Z","title_canon_sha256":"fa1e1725ee3ae24693c11dfbabdb77c0a6072cb5c06c8b2bdefb8a42cd853285"},"schema_version":"1.0","source":{"id":"2605.13360","kind":"arxiv","version":2}},"canonical_sha256":"cdbe47d5ec7dae242ab51e16603adf016f94b41cba7193b9662e67420dd513a9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cdbe47d5ec7dae242ab51e16603adf016f94b41cba7193b9662e67420dd513a9","first_computed_at":"2026-05-18T02:44:48.166571Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:48.166571Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"d6rWz4CGYwbp7Nxpq2GeGZMG/KVEQ9rizRyjtO0uMKkcJXzUPwyyBN2k8lcwWSG2BABw0J5vr+QjmNYvL8U8CA==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:48.166986Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13360","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e8b717ba06869333ba6196f491bbdbf9939a71ed7ba76d9ba8fb9dfbf2d98f14","sha256:eea37515a3aa323a19408d7aa0b0ff5cb8106c507f948b8f9c99af7958ab9476"],"state_sha256":"a7dbba05d37bf661878f1cc2174d5f22f683ec0fb384bfe17365ad493251ab0e"}