{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:5DC4N5WS5I2ITZPUOL7XVW2452","short_pith_number":"pith:5DC4N5WS","schema_version":"1.0","canonical_sha256":"e8c5c6f6d2ea3489e5f472ff7adb5cee9ebd2228777dfc23703b55dfccd8b60a","source":{"kind":"arxiv","id":"1507.01239","version":3},"attestation_state":"computed","paper":{"title":"Experiments on Parallel Training of Deep Neural Network using Model Averaging","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE"],"primary_cat":"cs.LG","authors_text":"Hang Su, Haoyu Chen","submitted_at":"2015-07-05T16:29:33Z","abstract_excerpt":"In this work we apply model averaging to parallel training of deep neural network (DNN). Parallelization is done in a model averaging manner. Data is partitioned and distributed to different nodes for local model updates, and model averaging across nodes is done every few minibatches. We use multiple GPUs for data parallelization, and Message Passing Interface (MPI) for communication between nodes, which allows us to perform model averaging frequently without losing much time on communication. We investigate the effectiveness of Natural Gradient Stochastic Gradient Descent (NG-SGD) and Restric"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1507.01239","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-07-05T16:29:33Z","cross_cats_sorted":["cs.NE"],"title_canon_sha256":"1834bfec154591708b069e9f654774e6d477884df43345b19d200282d763ae08","abstract_canon_sha256":"fd5610de6261a7046c3565ccb7838541f92617fc4ef8edc762dd32f02427c982"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:12:01.104089Z","signature_b64":"eYzRSiCA0H9ZOThspIS3TKZNccvA82vdjd6XOomOYq1ItR5BG2YJY+EOk1fHyjzvDLgm2fM41yYMAKPHcMDdDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e8c5c6f6d2ea3489e5f472ff7adb5cee9ebd2228777dfc23703b55dfccd8b60a","last_reissued_at":"2026-05-18T00:12:01.103446Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:12:01.103446Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Experiments on Parallel Training of Deep Neural Network using Model Averaging","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE"],"primary_cat":"cs.LG","authors_text":"Hang Su, Haoyu Chen","submitted_at":"2015-07-05T16:29:33Z","abstract_excerpt":"In this work we apply model averaging to parallel training of deep neural network (DNN). Parallelization is done in a model averaging manner. Data is partitioned and distributed to different nodes for local model updates, and model averaging across nodes is done every few minibatches. We use multiple GPUs for data parallelization, and Message Passing Interface (MPI) for communication between nodes, which allows us to perform model averaging frequently without losing much time on communication. We investigate the effectiveness of Natural Gradient Stochastic Gradient Descent (NG-SGD) and Restric"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.01239","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1507.01239","created_at":"2026-05-18T00:12:01.103573+00:00"},{"alias_kind":"arxiv_version","alias_value":"1507.01239v3","created_at":"2026-05-18T00:12:01.103573+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.01239","created_at":"2026-05-18T00:12:01.103573+00:00"},{"alias_kind":"pith_short_12","alias_value":"5DC4N5WS5I2I","created_at":"2026-05-18T12:29:05.191682+00:00"},{"alias_kind":"pith_short_16","alias_value":"5DC4N5WS5I2ITZPU","created_at":"2026-05-18T12:29:05.191682+00:00"},{"alias_kind":"pith_short_8","alias_value":"5DC4N5WS","created_at":"2026-05-18T12:29:05.191682+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452","json":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452.json","graph_json":"https://pith.science/api/pith-number/5DC4N5WS5I2ITZPUOL7XVW2452/graph.json","events_json":"https://pith.science/api/pith-number/5DC4N5WS5I2ITZPUOL7XVW2452/events.json","paper":"https://pith.science/paper/5DC4N5WS"},"agent_actions":{"view_html":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452","download_json":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452.json","view_paper":"https://pith.science/paper/5DC4N5WS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1507.01239&json=true","fetch_graph":"https://pith.science/api/pith-number/5DC4N5WS5I2ITZPUOL7XVW2452/graph.json","fetch_events":"https://pith.science/api/pith-number/5DC4N5WS5I2ITZPUOL7XVW2452/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452/action/storage_attestation","attest_author":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452/action/author_attestation","sign_citation":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452/action/citation_signature","submit_replication":"https://pith.science/pith/5DC4N5WS5I2ITZPUOL7XVW2452/action/replication_record"}},"created_at":"2026-05-18T00:12:01.103573+00:00","updated_at":"2026-05-18T00:12:01.103573+00:00"}