{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:WYTZ2ORNRDIDQU3DNRST7RAF64","short_pith_number":"pith:WYTZ2ORN","schema_version":"1.0","canonical_sha256":"b6279d3a2d88d03853636c653fc405f711fd424db2d5f82445108149794ff86b","source":{"kind":"arxiv","id":"2601.20273","version":2},"attestation_state":"computed","paper":{"title":"SwiftFusion: Scalable Sequence Parallelism for Distributed Inference of Diffusion Transformers on GPUs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.DC","authors_text":"Gennady Pekhimenko, Jiacheng Yang, Jun Wu, Yaoyao Ding, Yida Wang, Zhiying Xu","submitted_at":"2026-01-28T05:42:07Z","abstract_excerpt":"Diffusion Transformers (DiTs) have gained increasing adoption in high-quality image and video generation. As demand for higher-resolution images and longer videos increases, single-GPU inference becomes inefficient due to increased latency and large activation sizes. Current frameworks employ sequence parallelism (SP) techniques such as Ulysses Attention and Ring Attention to scale inference. However, these implementations have three primary limitations: (1) suboptimal communication patterns for network topologies on modern GPU machines, (2) latency bottlenecks from all-to-all operations in in"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2601.20273","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-01-28T05:42:07Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"088eace244b78c177291c6b0d4d3483c22732c0079928d843d7c0252d351b44c","abstract_canon_sha256":"d99a394eeab078ca9fe970fcdb66b863a8e65853b5a2441d472c4375cbce1d00"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:02:31.186129Z","signature_b64":"uVoEq0xYBBKdtMsHz2/T+fJZiEymeOG810pb8fom6toG5ZJ2O9LrQ/458S4yoStvmbkJx7Mx8YWtIPy83FpjBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b6279d3a2d88d03853636c653fc405f711fd424db2d5f82445108149794ff86b","last_reissued_at":"2026-05-26T01:02:31.185085Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:02:31.185085Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SwiftFusion: Scalable Sequence Parallelism for Distributed Inference of Diffusion Transformers on GPUs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.DC","authors_text":"Gennady Pekhimenko, Jiacheng Yang, Jun Wu, Yaoyao Ding, Yida Wang, Zhiying Xu","submitted_at":"2026-01-28T05:42:07Z","abstract_excerpt":"Diffusion Transformers (DiTs) have gained increasing adoption in high-quality image and video generation. As demand for higher-resolution images and longer videos increases, single-GPU inference becomes inefficient due to increased latency and large activation sizes. Current frameworks employ sequence parallelism (SP) techniques such as Ulysses Attention and Ring Attention to scale inference. However, these implementations have three primary limitations: (1) suboptimal communication patterns for network topologies on modern GPU machines, (2) latency bottlenecks from all-to-all operations in in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.20273","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.20273/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2601.20273","created_at":"2026-05-26T01:02:31.185234+00:00"},{"alias_kind":"arxiv_version","alias_value":"2601.20273v2","created_at":"2026-05-26T01:02:31.185234+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.20273","created_at":"2026-05-26T01:02:31.185234+00:00"},{"alias_kind":"pith_short_12","alias_value":"WYTZ2ORNRDID","created_at":"2026-05-26T01:02:31.185234+00:00"},{"alias_kind":"pith_short_16","alias_value":"WYTZ2ORNRDIDQU3D","created_at":"2026-05-26T01:02:31.185234+00:00"},{"alias_kind":"pith_short_8","alias_value":"WYTZ2ORN","created_at":"2026-05-26T01:02:31.185234+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2605.18739","citing_title":"LongLive-2.0: An NVFP4 Parallel Infrastructure for Long Video Generation","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04335","citing_title":"GENSERVE: Efficient Co-Serving of Heterogeneous Diffusion Model Workloads","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14561","citing_title":"CoCoDiff: Optimizing Collective Communications for Distributed Diffusion Transformer Inference Under Ulysses Sequence Parallelism","ref_index":47,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64","json":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64.json","graph_json":"https://pith.science/api/pith-number/WYTZ2ORNRDIDQU3DNRST7RAF64/graph.json","events_json":"https://pith.science/api/pith-number/WYTZ2ORNRDIDQU3DNRST7RAF64/events.json","paper":"https://pith.science/paper/WYTZ2ORN"},"agent_actions":{"view_html":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64","download_json":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64.json","view_paper":"https://pith.science/paper/WYTZ2ORN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2601.20273&json=true","fetch_graph":"https://pith.science/api/pith-number/WYTZ2ORNRDIDQU3DNRST7RAF64/graph.json","fetch_events":"https://pith.science/api/pith-number/WYTZ2ORNRDIDQU3DNRST7RAF64/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64/action/storage_attestation","attest_author":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64/action/author_attestation","sign_citation":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64/action/citation_signature","submit_replication":"https://pith.science/pith/WYTZ2ORNRDIDQU3DNRST7RAF64/action/replication_record"}},"created_at":"2026-05-26T01:02:31.185234+00:00","updated_at":"2026-05-26T01:02:31.185234+00:00"}