{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:OZLX7HA3YXIBF5GH76HSDTRYMI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"018b068a404b395d18a282dcfae45ad08bbeb8b5f35a74b5176d4b4f954a889b","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-29T06:49:12Z","title_canon_sha256":"202db2d44f5a870a62c62d6b1f77396d4a4b6774a2324eb2d9b11247275e198d"},"schema_version":"1.0","source":{"id":"2604.16395","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.16395","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"arxiv_version","alias_value":"2604.16395v3","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.16395","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_12","alias_value":"OZLX7HA3YXIB","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_16","alias_value":"OZLX7HA3YXIBF5GH","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_8","alias_value":"OZLX7HA3","created_at":"2026-05-20T00:04:31Z"}],"graph_snapshots":[{"event_id":"sha256:988115e8e89cebc22fffbe7552f05c305f6e4b3e2bc3d92e501bffbd1870525d","target":"graph","created_at":"2026-05-20T00:04:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our evaluation demonstrates that streaming architecture delivers up to 11x TTFT improvements, with cost-aware scheduling providing critical benefits under memory pressure, all while maintaining throughput parity with non-streaming baselines."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The two collected real-world workloads (web crawling and approximate nearest neighbor search) are representative of production streaming patterns, and the adaptive preemption strategies incur acceptable overheads that do not erode the reported TTFT gains in practice."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Stream2LLM delivers up to 11x lower TTFT by streaming context retrieval, adaptive preemption for append and update modes, and longest-common-prefix reuse in disaggregated LLM deployments while preserving throughput."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Stream2LLM overlaps incremental context retrieval with LLM prefill to cut time-to-first-token by up to 11x while matching non-streaming throughput."}],"snapshot_sha256":"fb3b4b28ddba83db3fa71031d271337a55a82857dd7db7d2c1f5bd0ba684e08a"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.16395/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Context retrieval systems for LLM inference face a critical challenge: high retrieval latency creates a fundamental tension between waiting for complete context (poor time-to-first-token) and proceeding without it (reduced quality). Streaming context incrementally--overlapping retrieval with inference--can mitigate this latency, but doing so with concurrent requests introduces new challenges: requests contend for GPU compute and memory, and scheduling must adapt to dynamic context arrivals.\n  We present Stream2LLM, a streaming-aware LLM serving system for concurrent prefill-decode disaggregate","authors_text":"Chengqi Luo, Divya Mahajan, Kexin Rong, Rajveer Bachkaniwala, Richard So","cross_cats":["cs.AI"],"headline":"Stream2LLM overlaps incremental context retrieval with LLM prefill to cut time-to-first-token by up to 11x while matching non-streaming throughput.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-29T06:49:12Z","title":"Stream2LLM: Overlap Context Streaming and Prefill for Reduced Time-to-First-Token (TTFT)"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.16395","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-14T22:21:01.224046Z","id":"4c3501cd-7f88-405b-97af-fd55a9f69dd6","model_set":{"reader":"grok-4.3"},"one_line_summary":"Stream2LLM delivers up to 11x lower TTFT by streaming context retrieval, adaptive preemption for append and update modes, and longest-common-prefix reuse in disaggregated LLM deployments while preserving throughput.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Stream2LLM overlaps incremental context retrieval with LLM prefill to cut time-to-first-token by up to 11x while matching non-streaming throughput.","strongest_claim":"Our evaluation demonstrates that streaming architecture delivers up to 11x TTFT improvements, with cost-aware scheduling providing critical benefits under memory pressure, all while maintaining throughput parity with non-streaming baselines.","weakest_assumption":"The two collected real-world workloads (web crawling and approximate nearest neighbor search) are representative of production streaming patterns, and the adaptive preemption strategies incur acceptable overheads that do not erode the reported TTFT gains in practice."}},"verdict_id":"4c3501cd-7f88-405b-97af-fd55a9f69dd6"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e293658a3b3cfe88b7307a5472a2c0d58978a91a6e53a06323eff6a97faa1149","target":"record","created_at":"2026-05-20T00:04:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"018b068a404b395d18a282dcfae45ad08bbeb8b5f35a74b5176d4b4f954a889b","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-29T06:49:12Z","title_canon_sha256":"202db2d44f5a870a62c62d6b1f77396d4a4b6774a2324eb2d9b11247275e198d"},"schema_version":"1.0","source":{"id":"2604.16395","kind":"arxiv","version":3}},"canonical_sha256":"76577f9c1bc5d012f4c7ff8f21ce38621db5bd06b819327452f90da7b015b9b3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"76577f9c1bc5d012f4c7ff8f21ce38621db5bd06b819327452f90da7b015b9b3","first_computed_at":"2026-05-20T00:04:31.857493Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:31.857493Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3BJHe9SrWRvklaNxNpGa6T2YWGaf3OCkb0qVQ/qT94Ob7rhsKNVNczmPuh1CLPLsuw44URf7gix8VsThRQH/BQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:31.858332Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.16395","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e293658a3b3cfe88b7307a5472a2c0d58978a91a6e53a06323eff6a97faa1149","sha256:988115e8e89cebc22fffbe7552f05c305f6e4b3e2bc3d92e501bffbd1870525d"],"state_sha256":"ef03445d7e2bd964c92e416df58939568800a1cf6f9fe69b7722c05633cf9f66"}