{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ILK7VY6J7ZIUPP4333ZMLUDFCV","short_pith_number":"pith:ILK7VY6J","schema_version":"1.0","canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","source":{"kind":"arxiv","id":"1806.00582","version":2},"attestation_state":"computed","paper":{"title":"Federated Learning with Non-IID Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Damon Civin, Liangzhen Lai, Meng Li, Naveen Suda, Vikas Chandra, Yue Zhao","submitted_at":"2018-06-02T04:45:58Z","abstract_excerpt":"Federated learning enables resource-constrained edge compute devices, such as mobile phones and IoT devices, to learn a shared model for prediction, while keeping the training data local. This decentralized approach to train models provides privacy, security, regulatory and economic benefits. In this work, we focus on the statistical challenge of federated learning when local data is non-IID. We first show that the accuracy of federated learning reduces significantly, by up to 55% for neural networks trained for highly skewed non-IID data, where each client device trains only on a single class"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"1806.00582","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-02T04:45:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"60793869653bc39cf520f01906c6876ef56ef521c0e3466b865e029107c2ebac","abstract_canon_sha256":"9966f33a42ee725d7f8da443d95995915eee321865583111c457d6b57714106f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:48.205124Z","signature_b64":"fp2aEv52+SW+EiIbFWl/RI8eTQqVDPrV6uIfN88I/kdMx9E802p1fHi7XpeUE6rp59ACWFd8YZZCU22GRQOjAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"42d5fae3c9fe5147bf9bdef2c5d065154555329e5ab2e379c17ab4bf747f2eb6","last_reissued_at":"2026-05-17T23:38:48.204579Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:48.204579Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Federated Learning with Non-IID Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Damon Civin, Liangzhen Lai, Meng Li, Naveen Suda, Vikas Chandra, Yue Zhao","submitted_at":"2018-06-02T04:45:58Z","abstract_excerpt":"Federated learning enables resource-constrained edge compute devices, such as mobile phones and IoT devices, to learn a shared model for prediction, while keeping the training data local. This decentralized approach to train models provides privacy, security, regulatory and economic benefits. In this work, we focus on the statistical challenge of federated learning when local data is non-IID. We first show that the accuracy of federated learning reduces significantly, by up to 55% for neural networks trained for highly skewed non-IID data, where each client device trains only on a single class"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments show that accuracy can be increased by 30% for the CIFAR-10 dataset with only 5% globally shared data.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That a small globally shared data subset can be created and distributed without violating the privacy or regulatory constraints that motivated federated learning in the first place.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Non-IID data causes up to 55% accuracy loss in federated learning due to weight divergence measured by earth mover's distance; 5% globally shared data recovers 30% accuracy on CIFAR-10.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4d192665be16f3de12614d01db1dd4cd4b577b3617b9a0fb002004845867b41a"},"source":{"id":"1806.00582","kind":"arxiv","version":2},"verdict":{"id":"dd3194bb-6fde-4875-8df5-9dfc510b6c46","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T10:18:46.227951Z","strongest_claim":"Experiments show that accuracy can be increased by 30% for the CIFAR-10 dataset with only 5% globally shared data.","one_line_summary":"Non-IID data causes up to 55% accuracy loss in federated learning due to weight divergence measured by earth mover's distance; 5% globally shared data recovers 30% accuracy on CIFAR-10.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That a small globally shared data subset can be created and distributed without violating the privacy or regulatory constraints that motivated federated learning in the first place.","pith_extraction_headline":"A small globally shared data subset recovers up to 30% accuracy lost to non-IID data in federated learning."},"references":{"count":23,"sample":[{"doi":"","year":2017,"title":"Hello edge: Keyword spotting on microcontrollers","work_id":"1ea17269-91bf-43a4-af26-7fe2e77c41b8","ref_index":1,"cited_arxiv_id":"1711.07128","is_internal_anchor":true},{"doi":"","year":2018,"title":"CMSIS-NN: Efficient neural network kernels for ARM Cortex-M CPUs","work_id":"69ae0df4-8381-4f4e-b7e4-79c11ea7c479","ref_index":2,"cited_arxiv_id":"1801.06601","is_internal_anchor":true},{"doi":"","year":2017,"title":"Communication-efﬁcient learning of deep networks from decentralized data,","work_id":"bb22376b-52c1-4ec6-9926-1ccb63000b6c","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2015,"title":"Federated optimization: Dis- tributed optimization beyond the datacenter.arXiv preprint arXiv:1511.03575","work_id":"73185f03-b92c-4266-9a44-e41bc13da684","ref_index":4,"cited_arxiv_id":"1511.03575","is_internal_anchor":true},{"doi":"","year":2017,"title":"Federated learning: Collaborative machine learning without centralized training data,","work_id":"61b390f8-4278-4f67-848f-3969353fdb87","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":23,"snapshot_sha256":"0a271ef58e65e7f6dad1cb2226b9c8c68b8be137b7b2ab5dbc6986a54563db78","internal_anchors":7},"formal_canon":{"evidence_count":2,"snapshot_sha256":"79f3f6428a3592f16c2cf33dccfcc98a747186dc9e575caa6ece74d996476b7e"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.00582","created_at":"2026-05-17T23:38:48.204669+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.00582v2","created_at":"2026-05-17T23:38:48.204669+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00582","created_at":"2026-05-17T23:38:48.204669+00:00"},{"alias_kind":"pith_short_12","alias_value":"ILK7VY6J7ZIU","created_at":"2026-05-18T12:32:31.084164+00:00"},{"alias_kind":"pith_short_16","alias_value":"ILK7VY6J7ZIUPP43","created_at":"2026-05-18T12:32:31.084164+00:00"},{"alias_kind":"pith_short_8","alias_value":"ILK7VY6J","created_at":"2026-05-18T12:32:31.084164+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":31,"internal_anchor_count":31,"sample":[{"citing_arxiv_id":"2605.22898","citing_title":"FIRMA: FIbonacci Ring Model Aggregation for Privacy-preserving Federated Learning","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2406.10861","citing_title":"Knowledge Distillation in Federated Learning: a Survey on Long Lasting Challenges and New Solutions","ref_index":188,"is_internal_anchor":true},{"citing_arxiv_id":"2505.12318","citing_title":"Task-agnostic Low-rank Residual Adaptation for Efficient Federated Continual Fine-Tuning","ref_index":56,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20975","citing_title":"Choose Wisely and Privately: Proactive Client Selection for Fair and Efficient Federated Learning","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2512.13647","citing_title":"REVERB-FL: Server-Side Adversarial and Reserve-Enhanced Federated Learning for Robust Audio Classification","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20975","citing_title":"Choose Wisely and Privately: Proactive Client Selection for Fair and Efficient Federated Learning","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20866","citing_title":"LOSCAR-SGD: Local SGD with Communication-Computation Overlap and Delay-Corrected Sparse Model Averaging","ref_index":62,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18174","citing_title":"Ringmaster LMO: Asynchronous Linear Minimization Oracle Momentum Method","ref_index":60,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18028","citing_title":"FedSDR: Federated Self-Distillation with Rectification","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15520","citing_title":"On the Fragility of Data Attribution When Learning Is Distributed","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2509.18367","citing_title":"Multi-Worker Selection based Distributed Swarm Learning for Edge IoT with Non-i.i.d. Data","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"1909.06335","citing_title":"Measuring the Effects of Non-Identical Data Distribution for Federated Visual Classification","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2602.00407","citing_title":"Fed-Listing: Federated Label Distribution Inference in Graph Neural Networks","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2603.03853","citing_title":"Practical Quantum Federated Learning for Privacy-Sensitive Healthcare: Communication Efficiency and Noise Resilience","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2603.11307","citing_title":"Client-Conditional Federated Learning via Local Training Data Statistics","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11122","citing_title":"FedSurrogate: Backdoor Defense in Federated Learning via Layer Criticality and Surrogate Replacement","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27510","citing_title":"FMCL: Class-Aware Client Clustering with Foundation Model Representations for Heterogeneous Federated Learning","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08871","citing_title":"Rennala MVR: Improved Time Complexity for Parallel Stochastic Optimization via Momentum-Based Variance Reduction","ref_index":58,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02169","citing_title":"Heterogeneous Model Fusion for Privacy-Aware Multi-Camera Surveillance via Synthetic Domain Adaptation","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09144","citing_title":"FedVSSAM: Mitigating Flatness Incompatibility in Sharpness-Aware Federated Learning","ref_index":63,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09137","citing_title":"Evaluating Federated Learning approaches for mammography under breast density heterogeneity","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08152","citing_title":"Privacy-Preserving Federated Learning: Integrating Zero-Knowledge Proofs in Scalable Distributed Architectures","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06571","citing_title":"CLAD: A Clustered Label-Agnostic Federated Learning Framework for Joint Anomaly Detection and Attack Classification","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02169","citing_title":"Heterogeneous Model Fusion for Privacy-Aware Multi-Camera Surveillance via Synthetic Domain Adaptation","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00698","citing_title":"FedKPer: Tackling Generalization and Personalization in Medical Federated Learning via Knowledge Personalization","ref_index":11,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV","json":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV.json","graph_json":"https://pith.science/api/pith-number/ILK7VY6J7ZIUPP4333ZMLUDFCV/graph.json","events_json":"https://pith.science/api/pith-number/ILK7VY6J7ZIUPP4333ZMLUDFCV/events.json","paper":"https://pith.science/paper/ILK7VY6J"},"agent_actions":{"view_html":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV","download_json":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV.json","view_paper":"https://pith.science/paper/ILK7VY6J","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.00582&json=true","fetch_graph":"https://pith.science/api/pith-number/ILK7VY6J7ZIUPP4333ZMLUDFCV/graph.json","fetch_events":"https://pith.science/api/pith-number/ILK7VY6J7ZIUPP4333ZMLUDFCV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/action/storage_attestation","attest_author":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/action/author_attestation","sign_citation":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/action/citation_signature","submit_replication":"https://pith.science/pith/ILK7VY6J7ZIUPP4333ZMLUDFCV/action/replication_record"}},"created_at":"2026-05-17T23:38:48.204669+00:00","updated_at":"2026-05-17T23:38:48.204669+00:00"}