{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZSADPI3RHHXRHXUAVD5CR4UKHV","short_pith_number":"pith:ZSADPI3R","schema_version":"1.0","canonical_sha256":"cc8037a37139ef13de80a8fa28f28a3d495eca8b0c61d50e8851e9bb140aabd4","source":{"kind":"arxiv","id":"2605.19593","version":1},"attestation_state":"computed","paper":{"title":"Towards Multi-Model LLM Schedulers: Empirical Insights into Offloading and Preemption","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.AI","authors_text":"Alexey Rolich, Andrea Baiocchi, Francesca Cuomo, Mert Yildiz, Pietro Spadaccino","submitted_at":"2026-05-19T09:39:16Z","abstract_excerpt":"Modern deployments of Large Language Models (LLMs) increasingly require serving multiple models with diverse architectures, sizes, and specialization on shared, heterogeneous hardware. This setting introduces new challenges for resource allocation, dispatching, and scheduling, particularly under GPU memory constraints where partial CPU-GPU offloading and preemption become necessary. While existing systems primarily optimize throughput for a single model, comparatively little work addresses multi-model scheduling under these conditions. In this paper, we present an empirical study of how differ"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19593","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-19T09:39:16Z","cross_cats_sorted":["cs.DC"],"title_canon_sha256":"a961b1622695450c28234129164829eab2a5f5fef279bd7e917d449940272f0b","abstract_canon_sha256":"a12929b3aac98eb87dd1cbfba4d28ed62f5b162edf65e50c55ad521ff1d76adc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:53.385863Z","signature_b64":"rTvgvw5mcp1BW49n3h6VFn1ndit86hc9ls7LR7aN9Kwm9GZhNwigTnDsrSZvQq4j0YVZNG1Rh2c1MohcJNGKDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cc8037a37139ef13de80a8fa28f28a3d495eca8b0c61d50e8851e9bb140aabd4","last_reissued_at":"2026-05-20T01:05:53.385062Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:53.385062Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Multi-Model LLM Schedulers: Empirical Insights into Offloading and Preemption","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.AI","authors_text":"Alexey Rolich, Andrea Baiocchi, Francesca Cuomo, Mert Yildiz, Pietro Spadaccino","submitted_at":"2026-05-19T09:39:16Z","abstract_excerpt":"Modern deployments of Large Language Models (LLMs) increasingly require serving multiple models with diverse architectures, sizes, and specialization on shared, heterogeneous hardware. This setting introduces new challenges for resource allocation, dispatching, and scheduling, particularly under GPU memory constraints where partial CPU-GPU offloading and preemption become necessary. While existing systems primarily optimize throughput for a single model, comparatively little work addresses multi-model scheduling under these conditions. In this paper, we present an empirical study of how differ"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19593","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19593/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19593","created_at":"2026-05-20T01:05:53.385179+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19593v1","created_at":"2026-05-20T01:05:53.385179+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19593","created_at":"2026-05-20T01:05:53.385179+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZSADPI3RHHXR","created_at":"2026-05-20T01:05:53.385179+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZSADPI3RHHXRHXUA","created_at":"2026-05-20T01:05:53.385179+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZSADPI3R","created_at":"2026-05-20T01:05:53.385179+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV","json":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV.json","graph_json":"https://pith.science/api/pith-number/ZSADPI3RHHXRHXUAVD5CR4UKHV/graph.json","events_json":"https://pith.science/api/pith-number/ZSADPI3RHHXRHXUAVD5CR4UKHV/events.json","paper":"https://pith.science/paper/ZSADPI3R"},"agent_actions":{"view_html":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV","download_json":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV.json","view_paper":"https://pith.science/paper/ZSADPI3R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19593&json=true","fetch_graph":"https://pith.science/api/pith-number/ZSADPI3RHHXRHXUAVD5CR4UKHV/graph.json","fetch_events":"https://pith.science/api/pith-number/ZSADPI3RHHXRHXUAVD5CR4UKHV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV/action/storage_attestation","attest_author":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV/action/author_attestation","sign_citation":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV/action/citation_signature","submit_replication":"https://pith.science/pith/ZSADPI3RHHXRHXUAVD5CR4UKHV/action/replication_record"}},"created_at":"2026-05-20T01:05:53.385179+00:00","updated_at":"2026-05-20T01:05:53.385179+00:00"}