{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:VECEKTK3G6C6XTBLQI65MDKYRC","short_pith_number":"pith:VECEKTK3","canonical_record":{"source":{"id":"2605.23389","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-22T09:00:45Z","cross_cats_sorted":[],"title_canon_sha256":"c56ca5fdd730b9db343eb4e44bcd7013cff3b23002b19127af0b17f70545d7c9","abstract_canon_sha256":"4b514338a4403c2171d5114a084d7cee05578bbea05559d10297961121540cd2"},"schema_version":"1.0"},"canonical_sha256":"a904454d5b3785ebcc2b823dd60d5888a1c165c805e44f161c6fd75dda29e68c","source":{"kind":"arxiv","id":"2605.23389","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23389","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23389v1","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23389","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"pith_short_12","alias_value":"VECEKTK3G6C6","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"pith_short_16","alias_value":"VECEKTK3G6C6XTBL","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"pith_short_8","alias_value":"VECEKTK3","created_at":"2026-05-25T02:01:52Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:VECEKTK3G6C6XTBLQI65MDKYRC","target":"record","payload":{"canonical_record":{"source":{"id":"2605.23389","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-22T09:00:45Z","cross_cats_sorted":[],"title_canon_sha256":"c56ca5fdd730b9db343eb4e44bcd7013cff3b23002b19127af0b17f70545d7c9","abstract_canon_sha256":"4b514338a4403c2171d5114a084d7cee05578bbea05559d10297961121540cd2"},"schema_version":"1.0"},"canonical_sha256":"a904454d5b3785ebcc2b823dd60d5888a1c165c805e44f161c6fd75dda29e68c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:52.048509Z","signature_b64":"3whKugnuBeYHUyk3iwLV8w+RkDmcWsT3O3xnTRfCnmLBnbjOK7H4GXyEeEL1mnBBEz3CiAXoYIpHrC7Kxgg4AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a904454d5b3785ebcc2b823dd60d5888a1c165c805e44f161c6fd75dda29e68c","last_reissued_at":"2026-05-25T02:01:52.048017Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:52.048017Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.23389","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"367L0/tDFtT74mZRY6qkUsqRkaMpV9xP3S5hOx5YUFDG/daFGas5FLcl5SOONtSz1eDpA1xI+dYRgNZu23YEAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:12:05.893884Z"},"content_sha256":"0e42da60b2dec2dc7401dc9617a2977136c2bc797bbef045203280060930cb5d","schema_version":"1.0","event_id":"sha256:0e42da60b2dec2dc7401dc9617a2977136c2bc797bbef045203280060930cb5d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:VECEKTK3G6C6XTBLQI65MDKYRC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"AlignedServe: Orchestrating Prefix-aware Batching to Build a High-throughput and Computing-efficient LLM Serving System","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Fengyao Bai, Hongbin Zhang, Jiangsu Du, Yutong Lu, Zhiguang Chen, Zhitao Chen","submitted_at":"2026-05-22T09:00:45Z","abstract_excerpt":"High-throughput inference serving is essential for applications built on large language models (LLMs). Existing serving frameworks reduce request-level and batch-level bubbles through batching and scheduling, but often overlook bubbles within each decode iteration. Tokens generated in the same iteration may incur different costs because they depend on KV caches of different lengths; tokens with long KV caches can become bottlenecks and delay the next iteration. We propose AlignedServe, an LLM serving framework built around prefix-aware batching. It groups requests with similar KV-cache lengths"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23389","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.23389/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PWa3r8AtTv0n1TOurM7VsmVZzB25TzyhuKRy3XfXudfjyLcD7IzL1ayPCUUC+el8wyuXaZGjERahbQRUZIZWBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T23:12:05.894555Z"},"content_sha256":"1338512412eee33cdf292222db8c50b88cba7f610371126c824e7c7d11e92517","schema_version":"1.0","event_id":"sha256:1338512412eee33cdf292222db8c50b88cba7f610371126c824e7c7d11e92517"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VECEKTK3G6C6XTBLQI65MDKYRC/bundle.json","state_url":"https://pith.science/pith/VECEKTK3G6C6XTBLQI65MDKYRC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VECEKTK3G6C6XTBLQI65MDKYRC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T23:12:05Z","links":{"resolver":"https://pith.science/pith/VECEKTK3G6C6XTBLQI65MDKYRC","bundle":"https://pith.science/pith/VECEKTK3G6C6XTBLQI65MDKYRC/bundle.json","state":"https://pith.science/pith/VECEKTK3G6C6XTBLQI65MDKYRC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VECEKTK3G6C6XTBLQI65MDKYRC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:VECEKTK3G6C6XTBLQI65MDKYRC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4b514338a4403c2171d5114a084d7cee05578bbea05559d10297961121540cd2","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-22T09:00:45Z","title_canon_sha256":"c56ca5fdd730b9db343eb4e44bcd7013cff3b23002b19127af0b17f70545d7c9"},"schema_version":"1.0","source":{"id":"2605.23389","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23389","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23389v1","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23389","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"pith_short_12","alias_value":"VECEKTK3G6C6","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"pith_short_16","alias_value":"VECEKTK3G6C6XTBL","created_at":"2026-05-25T02:01:52Z"},{"alias_kind":"pith_short_8","alias_value":"VECEKTK3","created_at":"2026-05-25T02:01:52Z"}],"graph_snapshots":[{"event_id":"sha256:1338512412eee33cdf292222db8c50b88cba7f610371126c824e7c7d11e92517","target":"graph","created_at":"2026-05-25T02:01:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.23389/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"High-throughput inference serving is essential for applications built on large language models (LLMs). Existing serving frameworks reduce request-level and batch-level bubbles through batching and scheduling, but often overlook bubbles within each decode iteration. Tokens generated in the same iteration may incur different costs because they depend on KV caches of different lengths; tokens with long KV caches can become bottlenecks and delay the next iteration. We propose AlignedServe, an LLM serving framework built around prefix-aware batching. It groups requests with similar KV-cache lengths","authors_text":"Fengyao Bai, Hongbin Zhang, Jiangsu Du, Yutong Lu, Zhiguang Chen, Zhitao Chen","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-22T09:00:45Z","title":"AlignedServe: Orchestrating Prefix-aware Batching to Build a High-throughput and Computing-efficient LLM Serving System"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23389","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0e42da60b2dec2dc7401dc9617a2977136c2bc797bbef045203280060930cb5d","target":"record","created_at":"2026-05-25T02:01:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4b514338a4403c2171d5114a084d7cee05578bbea05559d10297961121540cd2","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-22T09:00:45Z","title_canon_sha256":"c56ca5fdd730b9db343eb4e44bcd7013cff3b23002b19127af0b17f70545d7c9"},"schema_version":"1.0","source":{"id":"2605.23389","kind":"arxiv","version":1}},"canonical_sha256":"a904454d5b3785ebcc2b823dd60d5888a1c165c805e44f161c6fd75dda29e68c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a904454d5b3785ebcc2b823dd60d5888a1c165c805e44f161c6fd75dda29e68c","first_computed_at":"2026-05-25T02:01:52.048017Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:52.048017Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3whKugnuBeYHUyk3iwLV8w+RkDmcWsT3O3xnTRfCnmLBnbjOK7H4GXyEeEL1mnBBEz3CiAXoYIpHrC7Kxgg4AQ==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:52.048509Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.23389","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0e42da60b2dec2dc7401dc9617a2977136c2bc797bbef045203280060930cb5d","sha256:1338512412eee33cdf292222db8c50b88cba7f610371126c824e7c7d11e92517"],"state_sha256":"b00be2b7d283d76a61db68b79aa3a128bb9aed6825b374c09645e737f0840ce7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"niRhHXwNhlZWK8xH91UB5y3ygK0BOjuavGG3fS8/nILW7WldXdtteb7qlm6C3e6es5aZXbbofCQSDsF4jYsFCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T23:12:05.898331Z","bundle_sha256":"a98cb969aaa3ddb3d1fd6b200af9a17a00cd457bee6b46474407a3a78ec7663e"}}