{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:IGHNRFWMMSPLCREHMVE5M6B43L","short_pith_number":"pith:IGHNRFWM","schema_version":"1.0","canonical_sha256":"418ed896cc649eb144876549d6783cdacd0b5b592ce7b43c3e61374a87bacf64","source":{"kind":"arxiv","id":"1707.09414","version":1},"attestation_state":"computed","paper":{"title":"Optimized Broadcast for Deep Learning Workloads on Dense-GPU InfiniBand Clusters: MPI or NCCL?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Ammar Ahmad Awan, Ching-Hsiang Chu, Dhabaleswar K. Panda, Hari Subramoni","submitted_at":"2017-07-28T20:54:06Z","abstract_excerpt":"Dense Multi-GPU systems have recently gained a lot of attention in the HPC arena. Traditionally, MPI runtimes have been primarily designed for clusters with a large number of nodes. However, with the advent of MPI+CUDA applications and CUDA-Aware MPI runtimes like MVAPICH2 and OpenMPI, it has become important to address efficient communication schemes for such dense Multi-GPU nodes. This coupled with new application workloads brought forward by Deep Learning frameworks like Caffe and Microsoft CNTK pose additional design constraints due to very large message communication of GPU buffers during"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.09414","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-07-28T20:54:06Z","cross_cats_sorted":[],"title_canon_sha256":"f231bfe7bfabfdd5bfe021802d29e1cccd02428e9b64f37d04666ee45b4e9419","abstract_canon_sha256":"a1d8fdc7501754b93bf2d6d505c9a7affaab362ffbf78aa4a84c744df827d46b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:39:11.363441Z","signature_b64":"TM6h2lAtPNy2aC7J116K/SXfRnH3yjgXcBLtSTx1PJuQpT9P/TFy1RiTvoxOy3xpp/BJBUnxEBBqCF/xnTfSCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"418ed896cc649eb144876549d6783cdacd0b5b592ce7b43c3e61374a87bacf64","last_reissued_at":"2026-05-18T00:39:11.362840Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:39:11.362840Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Optimized Broadcast for Deep Learning Workloads on Dense-GPU InfiniBand Clusters: MPI or NCCL?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Ammar Ahmad Awan, Ching-Hsiang Chu, Dhabaleswar K. Panda, Hari Subramoni","submitted_at":"2017-07-28T20:54:06Z","abstract_excerpt":"Dense Multi-GPU systems have recently gained a lot of attention in the HPC arena. Traditionally, MPI runtimes have been primarily designed for clusters with a large number of nodes. However, with the advent of MPI+CUDA applications and CUDA-Aware MPI runtimes like MVAPICH2 and OpenMPI, it has become important to address efficient communication schemes for such dense Multi-GPU nodes. This coupled with new application workloads brought forward by Deep Learning frameworks like Caffe and Microsoft CNTK pose additional design constraints due to very large message communication of GPU buffers during"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.09414","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.09414","created_at":"2026-05-18T00:39:11.362941+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.09414v1","created_at":"2026-05-18T00:39:11.362941+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.09414","created_at":"2026-05-18T00:39:11.362941+00:00"},{"alias_kind":"pith_short_12","alias_value":"IGHNRFWMMSPL","created_at":"2026-05-18T12:31:21.493067+00:00"},{"alias_kind":"pith_short_16","alias_value":"IGHNRFWMMSPLCREH","created_at":"2026-05-18T12:31:21.493067+00:00"},{"alias_kind":"pith_short_8","alias_value":"IGHNRFWM","created_at":"2026-05-18T12:31:21.493067+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L","json":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L.json","graph_json":"https://pith.science/api/pith-number/IGHNRFWMMSPLCREHMVE5M6B43L/graph.json","events_json":"https://pith.science/api/pith-number/IGHNRFWMMSPLCREHMVE5M6B43L/events.json","paper":"https://pith.science/paper/IGHNRFWM"},"agent_actions":{"view_html":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L","download_json":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L.json","view_paper":"https://pith.science/paper/IGHNRFWM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.09414&json=true","fetch_graph":"https://pith.science/api/pith-number/IGHNRFWMMSPLCREHMVE5M6B43L/graph.json","fetch_events":"https://pith.science/api/pith-number/IGHNRFWMMSPLCREHMVE5M6B43L/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L/action/storage_attestation","attest_author":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L/action/author_attestation","sign_citation":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L/action/citation_signature","submit_replication":"https://pith.science/pith/IGHNRFWMMSPLCREHMVE5M6B43L/action/replication_record"}},"created_at":"2026-05-18T00:39:11.362941+00:00","updated_at":"2026-05-18T00:39:11.362941+00:00"}