{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TZNMFFYDLOH2XSSKEOLSMO6FDQ","short_pith_number":"pith:TZNMFFYD","schema_version":"1.0","canonical_sha256":"9e5ac297035b8fabca4a2397263bc51c15b5d1212c1dea04bce73a41365af123","source":{"kind":"arxiv","id":"2606.00866","version":1},"attestation_state":"computed","paper":{"title":"Idleness is Relative: Exploiting Tool-Call Idle Windows for Offloading in Agentic Systems with MORI","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.OS","authors_text":"Hanchen Li, Hao Kang, Ion Stoica, Tian Xia, Xiaokun Chen, Yifan Qiao, Yi Xu, Zhifei Li","submitted_at":"2026-05-30T19:44:25Z","abstract_excerpt":"Modern LLM serving systems increasingly host agentic workloads, whose sessions issue tens of model invocations interleaved with tool calls, accumulating KV cache that can be reused across steps. As requests' total KV cache size easily exceeds GPU HBM capacity, researchers offload them to CPU DRAM. However, tool-call durations span orders of magnitude, and the cost of transferring KV cache between tiers makes it impractical to re-place entries on every call. We observe that agentic programs exhibit a two-phase structure: busy phases of rapid short tool calls and idle phases dominated by long-ru"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.00866","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.OS","submitted_at":"2026-05-30T19:44:25Z","cross_cats_sorted":[],"title_canon_sha256":"61960e0c81613abaad1edab8fb4847ecb97f48de84384f0569db53e58ae612a2","abstract_canon_sha256":"3dfcc2ce509f9ce2dd1f61b487638b4eeb71809110aa9ed99a6a870437792546"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:04:08.308768Z","signature_b64":"g3ok5KdFreKTtw0taEqdDd2auNPiSSXRYtDWj9MfBhHJAwIIB8B1q8j92v3w3PkDtJsS29zUvwNVu0ummZsxDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9e5ac297035b8fabca4a2397263bc51c15b5d1212c1dea04bce73a41365af123","last_reissued_at":"2026-06-02T01:04:08.308432Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:04:08.308432Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Idleness is Relative: Exploiting Tool-Call Idle Windows for Offloading in Agentic Systems with MORI","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.OS","authors_text":"Hanchen Li, Hao Kang, Ion Stoica, Tian Xia, Xiaokun Chen, Yifan Qiao, Yi Xu, Zhifei Li","submitted_at":"2026-05-30T19:44:25Z","abstract_excerpt":"Modern LLM serving systems increasingly host agentic workloads, whose sessions issue tens of model invocations interleaved with tool calls, accumulating KV cache that can be reused across steps. As requests' total KV cache size easily exceeds GPU HBM capacity, researchers offload them to CPU DRAM. However, tool-call durations span orders of magnitude, and the cost of transferring KV cache between tiers makes it impractical to re-place entries on every call. We observe that agentic programs exhibit a two-phase structure: busy phases of rapid short tool calls and idle phases dominated by long-ru"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.00866","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.00866/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.00866","created_at":"2026-06-02T01:04:08.308482+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.00866v1","created_at":"2026-06-02T01:04:08.308482+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.00866","created_at":"2026-06-02T01:04:08.308482+00:00"},{"alias_kind":"pith_short_12","alias_value":"TZNMFFYDLOH2","created_at":"2026-06-02T01:04:08.308482+00:00"},{"alias_kind":"pith_short_16","alias_value":"TZNMFFYDLOH2XSSK","created_at":"2026-06-02T01:04:08.308482+00:00"},{"alias_kind":"pith_short_8","alias_value":"TZNMFFYD","created_at":"2026-06-02T01:04:08.308482+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ","json":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ.json","graph_json":"https://pith.science/api/pith-number/TZNMFFYDLOH2XSSKEOLSMO6FDQ/graph.json","events_json":"https://pith.science/api/pith-number/TZNMFFYDLOH2XSSKEOLSMO6FDQ/events.json","paper":"https://pith.science/paper/TZNMFFYD"},"agent_actions":{"view_html":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ","download_json":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ.json","view_paper":"https://pith.science/paper/TZNMFFYD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.00866&json=true","fetch_graph":"https://pith.science/api/pith-number/TZNMFFYDLOH2XSSKEOLSMO6FDQ/graph.json","fetch_events":"https://pith.science/api/pith-number/TZNMFFYDLOH2XSSKEOLSMO6FDQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ/action/storage_attestation","attest_author":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ/action/author_attestation","sign_citation":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ/action/citation_signature","submit_replication":"https://pith.science/pith/TZNMFFYDLOH2XSSKEOLSMO6FDQ/action/replication_record"}},"created_at":"2026-06-02T01:04:08.308482+00:00","updated_at":"2026-06-02T01:04:08.308482+00:00"}