{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:ILK7VY6J7ZIUPP4333ZMLUDFCV","short_pith_number":"pith:ILK7VY6J","canonical_record":{"source":{"id":"1806.00582","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-02T04:45:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"60793869653bc39cf520f01906c6876ef56ef521c0e3466b865e029107c2ebac","abstract_canon_sha256":"9966f33a42ee725d7f8da443d95995915eee321865583111c457d6b57714106f"},"schema_version":"1.0"},"canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","source":{"kind":"arxiv","id":"1806.00582","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.00582","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"1806.00582v2","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00582","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"ILK7VY6J7ZIU","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"ILK7VY6J7ZIUPP43","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"ILK7VY6J","created_at":"2026-05-18T12:32:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:ILK7VY6J7ZIUPP4333ZMLUDFCV","target":"record","payload":{"canonical_record":{"source":{"id":"1806.00582","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-02T04:45:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"60793869653bc39cf520f01906c6876ef56ef521c0e3466b865e029107c2ebac","abstract_canon_sha256":"9966f33a42ee725d7f8da443d95995915eee321865583111c457d6b57714106f"},"schema_version":"1.0"},"canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:48.205124Z","signature_b64":"fp2aEv52+SW+EiIbFWl/RI8eTQqVDPrV6uIfN88I/kdMx9E802p1fHi7XpeUE6rp59ACWFd8YZZCU22GRQOjAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","last_reissued_at":"2026-05-17T23:38:48.204579Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:48.204579Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.00582","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"f/R/BvSByWZZtf4yYeLGsXbK4XBghlw1P9uFgM4biTTXUygpoLYIn642kpINJ/E4+fTZDMfHJ8gGRjqxAZ8xDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T13:42:40.020359Z"},"content_sha256":"32a7924c04df06dff811558affc79f44f1a972263b7aa6e07c424afe47c499dd","schema_version":"1.0","event_id":"sha256:32a7924c04df06dff811558affc79f44f1a972263b7aa6e07c424afe47c499dd"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:ILK7VY6J7ZIUPP4333ZMLUDFCV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Federated Learning with Non-IID Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Damon Civin, Liangzhen Lai, Meng Li, Naveen Suda, Vikas Chandra, Yue Zhao","submitted_at":"2018-06-02T04:45:58Z","abstract_excerpt":"Federated learning enables resource-constrained edge compute devices, such as mobile phones and IoT devices, to learn a shared model for prediction, while keeping the training data local. This decentralized approach to train models provides privacy, security, regulatory and economic benefits. In this work, we focus on the statistical challenge of federated learning when local data is non-IID. We first show that the accuracy of federated learning reduces significantly, by up to 55% for neural networks trained for highly skewed non-IID data, where each client device trains only on a single class"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments show that accuracy can be increased by 30% for the CIFAR-10 dataset with only 5% globally shared data.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That a small globally shared data subset can be created and distributed without violating the privacy or regulatory constraints that motivated federated learning in the first place.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Non-IID data causes up to 55% accuracy loss in federated learning due to weight divergence measured by earth mover's distance; 5% globally shared data recovers 30% accuracy on CIFAR-10.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4d192665be16f3de12614d01db1dd4cd4b577b3617b9a0fb002004845867b41a"},"source":{"id":"1806.00582","kind":"arxiv","version":2},"verdict":{"id":"dd3194bb-6fde-4875-8df5-9dfc510b6c46","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T10:18:46.227951Z","strongest_claim":"Experiments show that accuracy can be increased by 30% for the CIFAR-10 dataset with only 5% globally shared data.","one_line_summary":"Non-IID data causes up to 55% accuracy loss in federated learning due to weight divergence measured by earth mover's distance; 5% globally shared data recovers 30% accuracy on CIFAR-10.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That a small globally shared data subset can be created and distributed without violating the privacy or regulatory constraints that motivated federated learning in the first place.","pith_extraction_headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning."},"references":{"count":23,"sample":[{"doi":"","year":2017,"title":"Hello edge: Keyword spotting on microcontrollers","work_id":"1ea17269-91bf-43a4-af26-7fe2e77c41b8","ref_index":1,"cited_arxiv_id":"1711.07128","is_internal_anchor":true},{"doi":"","year":2018,"title":"CMSIS-NN: Efficient neural network kernels for ARM Cortex-M CPUs","work_id":"69ae0df4-8381-4f4e-b7e4-79c11ea7c479","ref_index":2,"cited_arxiv_id":"1801.06601","is_internal_anchor":true},{"doi":"","year":2017,"title":"Communication-efﬁcient learning of deep networks from decentralized data,","work_id":"bb22376b-52c1-4ec6-9926-1ccb63000b6c","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2015,"title":"Federated optimization: Dis- tributed optimization beyond the datacenter.arXiv preprint arXiv:1511.03575","work_id":"73185f03-b92c-4266-9a44-e41bc13da684","ref_index":4,"cited_arxiv_id":"1511.03575","is_internal_anchor":true},{"doi":"","year":2017,"title":"Federated learning: Collaborative machine learning without centralized training data,","work_id":"61b390f8-4278-4f67-848f-3969353fdb87","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":23,"snapshot_sha256":"0a271ef58e65e7f6dad1cb2226b9c8c68b8be137b7b2ab5dbc6986a54563db78","internal_anchors":7},"formal_canon":{"evidence_count":2,"snapshot_sha256":"79f3f6428a3592f16c2cf33dccfcc98a747186dc9e575caa6ece74d996476b7e"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"dd3194bb-6fde-4875-8df5-9dfc510b6c46"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AwZIpuLKlwCTBKHXA6q7N+hMf9RJVW36wWGeRYzfettRm6tA9sHBHbXAZZRTzcp1dzqTcy9N5nHbrrUXh3zlDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T13:42:40.020968Z"},"content_sha256":"6be5067409130fa0e838c942998d8790e70fd72788bfdbdfe84ca8cd77786997","schema_version":"1.0","event_id":"sha256:6be5067409130fa0e838c942998d8790e70fd72788bfdbdfe84ca8cd77786997"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/bundle.json","state_url":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T13:42:40Z","links":{"resolver":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV","bundle":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/bundle.json","state":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:ILK7VY6J7ZIUPP4333ZMLUDFCV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9966f33a42ee725d7f8da443d95995915eee321865583111c457d6b57714106f","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-02T04:45:58Z","title_canon_sha256":"60793869653bc39cf520f01906c6876ef56ef521c0e3466b865e029107c2ebac"},"schema_version":"1.0","source":{"id":"1806.00582","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.00582","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"1806.00582v2","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00582","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"ILK7VY6J7ZIU","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"ILK7VY6J7ZIUPP43","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"ILK7VY6J","created_at":"2026-05-18T12:32:31Z"}],"graph_snapshots":[{"event_id":"sha256:6be5067409130fa0e838c942998d8790e70fd72788bfdbdfe84ca8cd77786997","target":"graph","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments show that accuracy can be increased by 30% for the CIFAR-10 dataset with only 5% globally shared data."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That a small globally shared data subset can be created and distributed without violating the privacy or regulatory constraints that motivated federated learning in the first place."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Non-IID data causes up to 55% accuracy loss in federated learning due to weight divergence measured by earth mover's distance; 5% globally shared data recovers 30% accuracy on CIFAR-10."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning."}],"snapshot_sha256":"4d192665be16f3de12614d01db1dd4cd4b577b3617b9a0fb002004845867b41a"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"79f3f6428a3592f16c2cf33dccfcc98a747186dc9e575caa6ece74d996476b7e"},"paper":{"abstract_excerpt":"Federated learning enables resource-constrained edge compute devices, such as mobile phones and IoT devices, to learn a shared model for prediction, while keeping the training data local. This decentralized approach to train models provides privacy, security, regulatory and economic benefits. In this work, we focus on the statistical challenge of federated learning when local data is non-IID. We first show that the accuracy of federated learning reduces significantly, by up to 55% for neural networks trained for highly skewed non-IID data, where each client device trains only on a single class","authors_text":"Damon Civin, Liangzhen Lai, Meng Li, Naveen Suda, Vikas Chandra, Yue Zhao","cross_cats":["stat.ML"],"headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-02T04:45:58Z","title":"Federated Learning with Non-IID Data"},"references":{"count":23,"internal_anchors":7,"resolved_work":23,"sample":[{"cited_arxiv_id":"1711.07128","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Hello edge: Keyword spotting on microcontrollers","work_id":"1ea17269-91bf-43a4-af26-7fe2e77c41b8","year":2017},{"cited_arxiv_id":"1801.06601","doi":"","is_internal_anchor":true,"ref_index":2,"title":"CMSIS-NN: Efficient neural network kernels for ARM Cortex-M CPUs","work_id":"69ae0df4-8381-4f4e-b7e4-79c11ea7c479","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Communication-efﬁcient learning of deep networks from decentralized data,","work_id":"bb22376b-52c1-4ec6-9926-1ccb63000b6c","year":2017},{"cited_arxiv_id":"1511.03575","doi":"","is_internal_anchor":true,"ref_index":4,"title":"Federated optimization: Dis- tributed optimization beyond the datacenter.arXiv preprint arXiv:1511.03575","work_id":"73185f03-b92c-4266-9a44-e41bc13da684","year":2015},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Federated learning: Collaborative machine learning without centralized training data,","work_id":"61b390f8-4278-4f67-848f-3969353fdb87","year":2017}],"snapshot_sha256":"0a271ef58e65e7f6dad1cb2226b9c8c68b8be137b7b2ab5dbc6986a54563db78"},"source":{"id":"1806.00582","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T10:18:46.227951Z","id":"dd3194bb-6fde-4875-8df5-9dfc510b6c46","model_set":{"reader":"grok-4.3"},"one_line_summary":"Non-IID data causes up to 55% accuracy loss in federated learning due to weight divergence measured by earth mover's distance; 5% globally shared data recovers 30% accuracy on CIFAR-10.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","strongest_claim":"Experiments show that accuracy can be increased by 30% for the CIFAR-10 dataset with only 5% globally shared data.","weakest_assumption":"That a small globally shared data subset can be created and distributed without violating the privacy or regulatory constraints that motivated federated learning in the first place."}},"verdict_id":"dd3194bb-6fde-4875-8df5-9dfc510b6c46"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:32a7924c04df06dff811558affc79f44f1a972263b7aa6e07c424afe47c499dd","target":"record","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9966f33a42ee725d7f8da443d95995915eee321865583111c457d6b57714106f","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-02T04:45:58Z","title_canon_sha256":"60793869653bc39cf520f01906c6876ef56ef521c0e3466b865e029107c2ebac"},"schema_version":"1.0","source":{"id":"1806.00582","kind":"arxiv","version":2}},"canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","first_computed_at":"2026-05-17T23:38:48.204579Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:48.204579Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fp2aEv52+SW+EiIbFWl/RI8eTQqVDPrV6uIfN88I/kdMx9E802p1fHi7XpeUE6rp59ACWFd8YZZCU22GRQOjAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:48.205124Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.00582","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:32a7924c04df06dff811558affc79f44f1a972263b7aa6e07c424afe47c499dd","sha256:6be5067409130fa0e838c942998d8790e70fd72788bfdbdfe84ca8cd77786997"],"state_sha256":"928cc1aebdfc73a1865db2677cfb646d877d2f6cef4185f98b01c3204c52e0ff"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"g9OwS11TFd4se3zfVq1Q02f5oI6SlsuED1kjD41tZj26T9M7wRtoVFmnRNpmS/pSnwFfNTv1KKNBw3B3zryDAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T13:42:40.023675Z","bundle_sha256":"3726da19cf0ce5a50cbc6779cfae5323780825ae9a277816825c3b1834e8d4b7"}}