{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2022:QVNGSZMFIB3554PNSTLCH54S2Z","short_pith_number":"pith:QVNGSZMF","schema_version":"1.0","canonical_sha256":"855a6965854077def1ed94d623f792d67bce956b17a61d5bde8deb005bfd8fe6","source":{"kind":"arxiv","id":"2202.07848","version":2},"attestation_state":"computed","paper":{"title":"Singularity: Planet-Scale, Preemptive and Elastic Scheduling of AI Workloads","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.DC","authors_text":"Abhishek Singh, Amey Agrawal, Atul Katiyar, Bhargav Gulavani, Chen Chen, Cheng Xu, Dharma Shukla, Eddie Ailijiang, Hasibur Rahman, Karthik Elangovan, Kaustubh Welankar, Lu Xun, Mark Russinovich (Microsoft), Muthian Sivathanu, Nipun Kwatra, Pankaj Sharma, Rahul Seetharaman, Ramachandran Ramjee, Ravi Anupindi, Rimma Nehme, Shreshth Singhal, Srinidhi Viswanatha, Suresh Krishnappa, Vaibhav Sharma, Vipul Modi, Zhou Lin","submitted_at":"2022-02-16T04:02:10Z","abstract_excerpt":"Lowering costs by driving high utilization across deep learning workloads is a crucial lever for cloud providers. We present Singularity, Microsoft's globally distributed scheduling service for highly-efficient and reliable execution of deep learning training and inference workloads. At the heart of Singularity is a novel, workload-aware scheduler that can transparently preempt and elastically scale deep learning workloads to drive high utilization without impacting their correctness or performance, across a global fleet of AI accelerators (e.g., GPUs, FPGAs).\n  All jobs in Singularity are pre"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2202.07848","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2022-02-16T04:02:10Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"6b8ea73f047cf8f265018c0aa55ee5bcddd1be090f35792c0f741ac9f248e8bc","abstract_canon_sha256":"f042e8d852a7e514246fbe9be94f9cc54ae7a3c5ad19d260b0f4b9f0d0ed5d7a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T03:58:36.955695Z","signature_b64":"MLM24hC62GrTb71twmBOlkGjE0nD3G3AAxj6LDamUtzNdjvppHOePGnPqxtfrxIrNdlr+rbUB1MQbiIO8YoWDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"855a6965854077def1ed94d623f792d67bce956b17a61d5bde8deb005bfd8fe6","last_reissued_at":"2026-07-05T03:58:36.955199Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T03:58:36.955199Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Singularity: Planet-Scale, Preemptive and Elastic Scheduling of AI Workloads","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.DC","authors_text":"Abhishek Singh, Amey Agrawal, Atul Katiyar, Bhargav Gulavani, Chen Chen, Cheng Xu, Dharma Shukla, Eddie Ailijiang, Hasibur Rahman, Karthik Elangovan, Kaustubh Welankar, Lu Xun, Mark Russinovich (Microsoft), Muthian Sivathanu, Nipun Kwatra, Pankaj Sharma, Rahul Seetharaman, Ramachandran Ramjee, Ravi Anupindi, Rimma Nehme, Shreshth Singhal, Srinidhi Viswanatha, Suresh Krishnappa, Vaibhav Sharma, Vipul Modi, Zhou Lin","submitted_at":"2022-02-16T04:02:10Z","abstract_excerpt":"Lowering costs by driving high utilization across deep learning workloads is a crucial lever for cloud providers. We present Singularity, Microsoft's globally distributed scheduling service for highly-efficient and reliable execution of deep learning training and inference workloads. At the heart of Singularity is a novel, workload-aware scheduler that can transparently preempt and elastically scale deep learning workloads to drive high utilization without impacting their correctness or performance, across a global fleet of AI accelerators (e.g., GPUs, FPGAs).\n  All jobs in Singularity are pre"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2202.07848","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2202.07848/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2202.07848","created_at":"2026-07-05T03:58:36.955257+00:00"},{"alias_kind":"arxiv_version","alias_value":"2202.07848v2","created_at":"2026-07-05T03:58:36.955257+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2202.07848","created_at":"2026-07-05T03:58:36.955257+00:00"},{"alias_kind":"pith_short_12","alias_value":"QVNGSZMFIB35","created_at":"2026-07-05T03:58:36.955257+00:00"},{"alias_kind":"pith_short_16","alias_value":"QVNGSZMFIB3554PN","created_at":"2026-07-05T03:58:36.955257+00:00"},{"alias_kind":"pith_short_8","alias_value":"QVNGSZMF","created_at":"2026-07-05T03:58:36.955257+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2606.25098","citing_title":"Power-Flexible AI Data Centers: A New Paradigm for Grid-Responsive Compute","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2606.04415","citing_title":"FlexNPU: Transparent NPU Virtualization for Dynamic LLM Prefill-Decode Co-location","ref_index":5,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z","json":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z.json","graph_json":"https://pith.science/api/pith-number/QVNGSZMFIB3554PNSTLCH54S2Z/graph.json","events_json":"https://pith.science/api/pith-number/QVNGSZMFIB3554PNSTLCH54S2Z/events.json","paper":"https://pith.science/paper/QVNGSZMF"},"agent_actions":{"view_html":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z","download_json":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z.json","view_paper":"https://pith.science/paper/QVNGSZMF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2202.07848&json=true","fetch_graph":"https://pith.science/api/pith-number/QVNGSZMFIB3554PNSTLCH54S2Z/graph.json","fetch_events":"https://pith.science/api/pith-number/QVNGSZMFIB3554PNSTLCH54S2Z/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z/action/storage_attestation","attest_author":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z/action/author_attestation","sign_citation":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z/action/citation_signature","submit_replication":"https://pith.science/pith/QVNGSZMFIB3554PNSTLCH54S2Z/action/replication_record"}},"created_at":"2026-07-05T03:58:36.955257+00:00","updated_at":"2026-07-05T03:58:36.955257+00:00"}