{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:KSEEQ52XHJJ6D766ONRYIVVEOU","short_pith_number":"pith:KSEEQ52X","schema_version":"1.0","canonical_sha256":"54884877573a53e1ffde73638456a47502275e771349cad1ab38b4c8fe3457c9","source":{"kind":"arxiv","id":"2603.18897","version":3},"attestation_state":"computed","paper":{"title":"Parallelizing Tool Execution and LLM Generation for Low-Latency Agent Serving","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.DC","authors_text":"Han Zhao, Hao Wang, Jianxun Li, Kai Chen, Kaiqiang Xu, Rui Ma, Yifan Sui, Yuqing Yang, Zhiyuan He","submitted_at":"2026-03-19T13:36:50Z","abstract_excerpt":"LLM-powered agents execute tasks through a sequential loop of model generation and tool execution. Today's serving systems serialize this loop, leaving tool latency exposed on the task critical path. This paper presents PASTE, a tool-aware agent-serving system that predicts concrete future tool invocations from recurring agent patterns and executes them speculatively while the LLM is still generating. PASTE isolates speculative results until confirmed by the LLM and jointly schedules tool execution and returning LLM sessions to avoid shifting bottlenecks to the GPU. Across deep research, codin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.18897","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.DC","submitted_at":"2026-03-19T13:36:50Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ea0f1818be5958c62cea495b38dd5a765877e5a4fb7786acff1758c93009628e","abstract_canon_sha256":"2d51d0d8391c6b1ee3b773985f9fd2f107ed05f8b849157dd7f494e55552cd7a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:09:56.973370Z","signature_b64":"2+vDSbm/cZYetXHpUGKiYszLDM8b4E7pl7yh6PNzVsy2Oyzs8eaF0IJA+bv01zOJYzU7HqDd0HqCd4ydrAOTBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"54884877573a53e1ffde73638456a47502275e771349cad1ab38b4c8fe3457c9","last_reissued_at":"2026-06-19T16:09:56.973011Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:09:56.973011Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Parallelizing Tool Execution and LLM Generation for Low-Latency Agent Serving","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.DC","authors_text":"Han Zhao, Hao Wang, Jianxun Li, Kai Chen, Kaiqiang Xu, Rui Ma, Yifan Sui, Yuqing Yang, Zhiyuan He","submitted_at":"2026-03-19T13:36:50Z","abstract_excerpt":"LLM-powered agents execute tasks through a sequential loop of model generation and tool execution. Today's serving systems serialize this loop, leaving tool latency exposed on the task critical path. This paper presents PASTE, a tool-aware agent-serving system that predicts concrete future tool invocations from recurring agent patterns and executes them speculatively while the LLM is still generating. PASTE isolates speculative results until confirmed by the LLM and jointly schedules tool execution and returning LLM sessions to avoid shifting bottlenecks to the GPU. Across deep research, codin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.18897","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.18897/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.18897","created_at":"2026-06-19T16:09:56.973069+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.18897v3","created_at":"2026-06-19T16:09:56.973069+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.18897","created_at":"2026-06-19T16:09:56.973069+00:00"},{"alias_kind":"pith_short_12","alias_value":"KSEEQ52XHJJ6","created_at":"2026-06-19T16:09:56.973069+00:00"},{"alias_kind":"pith_short_16","alias_value":"KSEEQ52XHJJ6D766","created_at":"2026-06-19T16:09:56.973069+00:00"},{"alias_kind":"pith_short_8","alias_value":"KSEEQ52X","created_at":"2026-06-19T16:09:56.973069+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":8,"sample":[{"citing_arxiv_id":"2605.27744","citing_title":"A Policy-Driven Runtime Layer for Agentic LLM Serving","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2606.02483","citing_title":"Ghost Tool Calls: Issue-Time Privacy for Speculative Agent Tools","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21965","citing_title":"SpecHop: Continuous Speculation for Accelerating Multi-Hop Retrieval Agents","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06472","citing_title":"Efficient Serving for Dynamic Agent Workflows with Prediction-based KV-Cache Management","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2604.20938","citing_title":"HARBOR: Automated Harness Optimization","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14228","citing_title":"Dive into Claude Code: The Design Space of Today's and Future AI Agent Systems","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16469","citing_title":"B-PASTE: Beam-Aware Pattern-Guided Speculative Execution for Resource-Constrained LLM Agents","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08224","citing_title":"Externalization in LLM Agents: A Unified Review of Memory, Skills, Protocols and Harness Engineering","ref_index":132,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU","json":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU.json","graph_json":"https://pith.science/api/pith-number/KSEEQ52XHJJ6D766ONRYIVVEOU/graph.json","events_json":"https://pith.science/api/pith-number/KSEEQ52XHJJ6D766ONRYIVVEOU/events.json","paper":"https://pith.science/paper/KSEEQ52X"},"agent_actions":{"view_html":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU","download_json":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU.json","view_paper":"https://pith.science/paper/KSEEQ52X","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.18897&json=true","fetch_graph":"https://pith.science/api/pith-number/KSEEQ52XHJJ6D766ONRYIVVEOU/graph.json","fetch_events":"https://pith.science/api/pith-number/KSEEQ52XHJJ6D766ONRYIVVEOU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU/action/storage_attestation","attest_author":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU/action/author_attestation","sign_citation":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU/action/citation_signature","submit_replication":"https://pith.science/pith/KSEEQ52XHJJ6D766ONRYIVVEOU/action/replication_record"}},"created_at":"2026-06-19T16:09:56.973069+00:00","updated_at":"2026-06-19T16:09:56.973069+00:00"}