{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UPZLLDUDH7UTJV2O7W7U74YNJH","short_pith_number":"pith:UPZLLDUD","schema_version":"1.0","canonical_sha256":"a3f2b58e833fe934d74efdbf4ff30d49d263130d3cfba572056945e38e66e9f5","source":{"kind":"arxiv","id":"2605.15617","version":1},"attestation_state":"computed","paper":{"title":"A Few GPUs, A Whole Lotta Scale: Faithful LLM Training Emulation with PrismLLM","license":"http://creativecommons.org/licenses/by/4.0/","headline":"PrismLLM emulates 8192-GPU LLM training using fewer than 1% of the GPUs with 0.58% average iteration time error.","cross_cats":["cs.AI"],"primary_cat":"cs.DC","authors_text":"Boyi Jia, Brian Sutioso, ChonLam Lao, Ennan Zhai, Erci Xu, Jiamin Cao, Jiaqi Gao, Jingren Zhou, Kui Ren, Minlan Yu, Shaoke Xi, Yong Li, Zhengping Qian, Zhipeng Zhang","submitted_at":"2026-05-15T04:58:20Z","abstract_excerpt":"Large language model (LLM) training today runs on clusters spanning thousands of GPUs. While this scale enables rapid model advances, developing, debugging, and performance-tuning the training framework inevitably becomes complex and costly. This is because engineers often need to reproduce production behaviors to diagnose failures or evaluate optimizations, thereby demanding frequent and even exclusive access to production-scale clusters -- which becomes increasingly hard given that the majority of GPUs are already committed to production workloads. Simulation relies on complex performance mo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.15617","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-15T04:58:20Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"2e2370e667ba6379fb2fd1587171bbeb379a059696531bd859c20721b31ce0f4","abstract_canon_sha256":"8e81e68e1ce152e7814a552d4c12e61b0f44f5dee4dfa4372449a2193be8c481"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:08.427533Z","signature_b64":"GrdRTSfy+xYLadyOAqgM6m5X0gOCCoVUOvBHk+QSFZmwQF0mHqIxhHFzLPliUhRwJnvBRQtTBRfzeIFCvCryDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a3f2b58e833fe934d74efdbf4ff30d49d263130d3cfba572056945e38e66e9f5","last_reissued_at":"2026-05-20T00:01:08.426613Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:08.426613Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Few GPUs, A Whole Lotta Scale: Faithful LLM Training Emulation with PrismLLM","license":"http://creativecommons.org/licenses/by/4.0/","headline":"PrismLLM emulates 8192-GPU LLM training using fewer than 1% of the GPUs with 0.58% average iteration time error.","cross_cats":["cs.AI"],"primary_cat":"cs.DC","authors_text":"Boyi Jia, Brian Sutioso, ChonLam Lao, Ennan Zhai, Erci Xu, Jiamin Cao, Jiaqi Gao, Jingren Zhou, Kui Ren, Minlan Yu, Shaoke Xi, Yong Li, Zhengping Qian, Zhipeng Zhang","submitted_at":"2026-05-15T04:58:20Z","abstract_excerpt":"Large language model (LLM) training today runs on clusters spanning thousands of GPUs. While this scale enables rapid model advances, developing, debugging, and performance-tuning the training framework inevitably becomes complex and costly. This is because engineers often need to reproduce production behaviors to diagnose failures or evaluate optimizations, thereby demanding frequent and even exclusive access to production-scale clusters -- which becomes increasingly hard given that the majority of GPUs are already committed to production workloads. Simulation relies on complex performance mo"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"PrismLLM accurately reproduces performance and memory behavior, achieving only 0.58% average error in iteration time and less than 0.01% error in peak GPU memory usage. PrismLLM can emulate clusters of up to 8192 GPUs using fewer than 1% of the physical GPUs required by the original deployment.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The slicing-based construction of the high-fidelity execution graph fully captures computation, communication, and dependencies at the target scale such that hybrid emulation of selected ranks produces faithful large-scale behavior without missing scale-dependent effects.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"PrismLLM constructs a sliced execution graph and uses hybrid emulation to faithfully reproduce performance and memory behavior of up to 8192-GPU LLM training runs on fewer than 1% of the original GPUs.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"PrismLLM emulates 8192-GPU LLM training using fewer than 1% of the GPUs with 0.58% average iteration time error.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"b341b2c7494d056ad0dfd06916b0a8d1b1877676950e5d77aebcdcfe69d9464e"},"source":{"id":"2605.15617","kind":"arxiv","version":1},"verdict":{"id":"e519bb83-dc9d-440d-bebc-a099f26f92a8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T19:52:01.110099Z","strongest_claim":"PrismLLM accurately reproduces performance and memory behavior, achieving only 0.58% average error in iteration time and less than 0.01% error in peak GPU memory usage. PrismLLM can emulate clusters of up to 8192 GPUs using fewer than 1% of the physical GPUs required by the original deployment.","one_line_summary":"PrismLLM constructs a sliced execution graph and uses hybrid emulation to faithfully reproduce performance and memory behavior of up to 8192-GPU LLM training runs on fewer than 1% of the original GPUs.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The slicing-based construction of the high-fidelity execution graph fully captures computation, communication, and dependencies at the target scale such that hybrid emulation of selected ranks produces faithful large-scale behavior without missing scale-dependent effects.","pith_extraction_headline":"PrismLLM emulates 8192-GPU LLM training using fewer than 1% of the GPUs with 0.58% average iteration time error."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15617/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-19T20:01:34.051019Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T20:01:19.282897Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T19:34:34.616247Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T17:41:56.040465Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"24ace74fc83a080a69b3e3886c6e94f30bccf06f87b352e122a92dff55c4d09d"},"references":{"count":37,"sample":[{"doi":"10.1109/micro61859.2024.00021","year":2024,"title":"IEEE Computer Society, 338–351","work_id":"ec8e53fa-7797-499d-8abf-b570cbd7d680","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Flux: Fast software-based communication overlap on gpus through kernel fusion.arXiv preprint arXiv:2406.06858","work_id":"5d0e6adc-6dd2-49cc-8551-dc00433ed79f","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"CRIU Project Developers. 2026. Github - CRIU: Checkpoint/Restore In Userspace.https://github.com/checkpoint-restore/criu. (2026)","work_id":"c851a98e-5754-4b98-bfc6-56c9b9a4e57a","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning","work_id":"fff3953b-5efb-4753-bee4-002f59995810","ref_index":4,"cited_arxiv_id":"2307.08691","is_internal_anchor":true},{"doi":"","year":null,"title":"Fu, Stefano Ermon, Atri Rudra, and Christopher Ré","work_id":"0e3c45ca-6ccf-4ec0-925b-aa9735a23f6c","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":37,"snapshot_sha256":"f6488b7aa4348c289d66b4881376a603013cf6ae18ea40edca5039a04065d261","internal_anchors":7},"formal_canon":{"evidence_count":2,"snapshot_sha256":"05f51775ac7d178c0f8acd2f308630e10c03acf34cf1c7660d512f4d2216388b"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15617","created_at":"2026-05-20T00:01:08.426754+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15617v1","created_at":"2026-05-20T00:01:08.426754+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15617","created_at":"2026-05-20T00:01:08.426754+00:00"},{"alias_kind":"pith_short_12","alias_value":"UPZLLDUDH7UT","created_at":"2026-05-20T00:01:08.426754+00:00"},{"alias_kind":"pith_short_16","alias_value":"UPZLLDUDH7UTJV2O","created_at":"2026-05-20T00:01:08.426754+00:00"},{"alias_kind":"pith_short_8","alias_value":"UPZLLDUD","created_at":"2026-05-20T00:01:08.426754+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH","json":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH.json","graph_json":"https://pith.science/api/pith-number/UPZLLDUDH7UTJV2O7W7U74YNJH/graph.json","events_json":"https://pith.science/api/pith-number/UPZLLDUDH7UTJV2O7W7U74YNJH/events.json","paper":"https://pith.science/paper/UPZLLDUD"},"agent_actions":{"view_html":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH","download_json":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH.json","view_paper":"https://pith.science/paper/UPZLLDUD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15617&json=true","fetch_graph":"https://pith.science/api/pith-number/UPZLLDUDH7UTJV2O7W7U74YNJH/graph.json","fetch_events":"https://pith.science/api/pith-number/UPZLLDUDH7UTJV2O7W7U74YNJH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH/action/storage_attestation","attest_author":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH/action/author_attestation","sign_citation":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH/action/citation_signature","submit_replication":"https://pith.science/pith/UPZLLDUDH7UTJV2O7W7U74YNJH/action/replication_record"}},"created_at":"2026-05-20T00:01:08.426754+00:00","updated_at":"2026-05-20T00:01:08.426754+00:00"}