{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:VZAZBDIC752UTMEZASMQWQ7JBA","short_pith_number":"pith:VZAZBDIC","schema_version":"1.0","canonical_sha256":"ae41908d02ff7549b09904990b43e908350564c0858783c17d226d1d1b63b6ae","source":{"kind":"arxiv","id":"1909.06335","version":1},"attestation_state":"computed","paper":{"title":"Measuring the Effects of Non-Identical Data Distribution for Federated Visual Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Non-identical data distributions degrade federated averaging performance on visual tasks, but server momentum recovers most of the accuracy loss.","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Hang Qi, Matthew Brown, Tzu-Ming Harry Hsu","submitted_at":"2019-09-13T17:26:20Z","abstract_excerpt":"Federated Learning enables visual models to be trained in a privacy-preserving way using real-world data from mobile devices. Given their distributed nature, the statistics of the data across these devices is likely to differ significantly. In this work, we look at the effect such non-identical data distributions has on visual classification via Federated Learning. We propose a way to synthesize datasets with a continuous range of identicalness and provide performance measures for the Federated Averaging algorithm. We show that performance degrades as distributions differ more, and propose a m"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"1909.06335","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-09-13T17:26:20Z","cross_cats_sorted":["cs.CV","stat.ML"],"title_canon_sha256":"bdf8dc665285f408ac522b864cb845415d7e8dfc30ccbca9450d38eaf2efea69","abstract_canon_sha256":"ea16202aaf1c40a1a276d37b9147b273854430fed81d2d85e909613bdca48229"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:13.505205Z","signature_b64":"yWhnESu+nQDqo9vEj5Zr4qeoTHFYfiiAbYfsrbD/XGYdtxPcDOGUwVYNnC90MZE9oBuKxkVyuN4UNK6YiJc4Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ae41908d02ff7549b09904990b43e908350564c0858783c17d226d1d1b63b6ae","last_reissued_at":"2026-05-17T23:38:13.504555Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:13.504555Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Measuring the Effects of Non-Identical Data Distribution for Federated Visual Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Non-identical data distributions degrade federated averaging performance on visual tasks, but server momentum recovers most of the accuracy loss.","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Hang Qi, Matthew Brown, Tzu-Ming Harry Hsu","submitted_at":"2019-09-13T17:26:20Z","abstract_excerpt":"Federated Learning enables visual models to be trained in a privacy-preserving way using real-world data from mobile devices. Given their distributed nature, the statistics of the data across these devices is likely to differ significantly. In this work, we look at the effect such non-identical data distributions has on visual classification via Federated Learning. We propose a way to synthesize datasets with a continuous range of identicalness and provide performance measures for the Federated Averaging algorithm. We show that performance degrades as distributions differ more, and propose a m"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on CIFAR-10 demonstrate improved classification performance over a range of non-identicalness, with classification accuracy improved from 30.1% to 76.9% in the most skewed settings.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The synthetic non-identical datasets created by the authors accurately capture the statistical heterogeneity present in real-world federated visual data collected from mobile devices.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Non-identical data distributions degrade federated averaging accuracy on visual classification, but server momentum raises CIFAR-10 accuracy from 30.1% to 76.9% in the most skewed regimes.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Non-identical data distributions degrade federated averaging performance on visual tasks, but server momentum recovers most of the accuracy loss.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d6591ad3cd2b5c79480a8529ec8a44c8015e707520b670e4ef3ac7750f654ef6"},"source":{"id":"1909.06335","kind":"arxiv","version":1},"verdict":{"id":"fcaa14a9-8382-4729-9e94-dcfe37dd012a","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T17:31:04.648279Z","strongest_claim":"Experiments on CIFAR-10 demonstrate improved classification performance over a range of non-identicalness, with classification accuracy improved from 30.1% to 76.9% in the most skewed settings.","one_line_summary":"Non-identical data distributions degrade federated averaging accuracy on visual classification, but server momentum raises CIFAR-10 accuracy from 30.1% to 76.9% in the most skewed regimes.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The synthetic non-identical datasets created by the authors accurately capture the statistical heterogeneity present in real-world federated visual data collected from mobile devices.","pith_extraction_headline":"Non-identical data distributions degrade federated averaging performance on visual tasks, but server momentum recovers most of the accuracy loss."},"references":{"count":20,"sample":[{"doi":"","year":2009,"title":"Learning multiple layers of features from tiny images","work_id":"533d9d88-54d2-4699-9f13-27ab1746339c","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1907,"title":"International Conference on Learning Representations , year =","work_id":"74e5da63-cffe-4f1a-a090-e0b98d805e10","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Communication-efficient learning of deep networks from decentralized data","work_id":"5c6a6545-af99-43fb-8083-08ceaf806562","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2007,"title":"Gradient methods for minimizing composite objective function","work_id":"82d58aed-7045-4ac7-af05-ef90cac9ee7a","ref_index":6,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advanced convolutional neural networks","work_id":"d468776e-70d7-4ba4-8062-11555e000593","ref_index":10,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":20,"snapshot_sha256":"a08ef1b4d83b678b3b7b3048f52054dcdb4ffab61ea2f38ff89eb0afdab3535f","internal_anchors":4},"formal_canon":{"evidence_count":1,"snapshot_sha256":"aad5c54d1e883eb2db41b17bbc2eb572fa9028591bfb11511d29f679aa60ca68"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1909.06335","created_at":"2026-05-17T23:38:13.504643+00:00"},{"alias_kind":"arxiv_version","alias_value":"1909.06335v1","created_at":"2026-05-17T23:38:13.504643+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1909.06335","created_at":"2026-05-17T23:38:13.504643+00:00"},{"alias_kind":"pith_short_12","alias_value":"VZAZBDIC752U","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"VZAZBDIC752UTMEZ","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"VZAZBDIC","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":19,"internal_anchor_count":19,"sample":[{"citing_arxiv_id":"2507.06542","citing_title":"On the Surprising Effectiveness of a Single Global Merging in Decentralized Learning","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2508.05663","citing_title":"Random Walk Learning and the Pac-Man Attack","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2509.18367","citing_title":"Multi-Worker Selection based Distributed Swarm Learning for Edge IoT with Non-i.i.d. Data","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2512.12022","citing_title":"DFedReweighting: A Unified Framework for Objective-Oriented Reweighting in Decentralized Federated Learning","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2601.15127","citing_title":"DeepFedNAS: Efficient Hardware-Aware Architecture Adaptation for Heterogeneous IoT Federations via Pareto-Guided Supernet Training","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2602.13529","citing_title":"SecureGate: Learning When to Reveal PII Safely via Token-Gated Dual-Adapters for Federated LLMs","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2602.19945","citing_title":"DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2602.23827","citing_title":"FedNSAM:Consistency of Local and Global Flatness for Federated Learning","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2603.05116","citing_title":"FedBCD:Communication-Efficient Accelerated Block Coordinate Gradient Descent for Federated Learning","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13434","citing_title":"Rescaled Asynchronous SGD: Optimal Distributed Optimization under Data and System Heterogeneity","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11010","citing_title":"A Comparative Study of Federated Learning Aggregation Strategies under Homogeneous and Heterogeneous Data Distributions","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2604.26809","citing_title":"Asynchronous Federated Unlearning with Invariance Calibration for Medical Imaging","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09144","citing_title":"FedVSSAM: Mitigating Flatness Incompatibility in Sharpness-Aware Federated Learning","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08992","citing_title":"When More Parameters Hurt: Foundation Model Priors Amplify Worst-Client Disparity Under Extreme Federated Heterogeneity","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.22885","citing_title":"Federated Cross-Modal Retrieval with Missing Modalities via Semantic Routing and Adapter Personalization","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01061","citing_title":"PRISM: Exposing and Resolving Spurious Isolation in Federated Multimodal Continual Learning","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12160","citing_title":"PubSwap: Public-Data Off-Policy Coordination for Federated RLVR","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12768","citing_title":"Rethinking the Personalized Relaxed Initialization in the Federated Learning: Consistency and Generalization","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14956","citing_title":"FedGUI: Benchmarking Federated GUI Agents across Heterogeneous Platforms, Devices, and Operating Systems","ref_index":1,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":1,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA","json":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA.json","graph_json":"https://pith.science/api/pith-number/VZAZBDIC752UTMEZASMQWQ7JBA/graph.json","events_json":"https://pith.science/api/pith-number/VZAZBDIC752UTMEZASMQWQ7JBA/events.json","paper":"https://pith.science/paper/VZAZBDIC"},"agent_actions":{"view_html":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA","download_json":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA.json","view_paper":"https://pith.science/paper/VZAZBDIC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1909.06335&json=true","fetch_graph":"https://pith.science/api/pith-number/VZAZBDIC752UTMEZASMQWQ7JBA/graph.json","fetch_events":"https://pith.science/api/pith-number/VZAZBDIC752UTMEZASMQWQ7JBA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA/action/storage_attestation","attest_author":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA/action/author_attestation","sign_citation":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA/action/citation_signature","submit_replication":"https://pith.science/pith/VZAZBDIC752UTMEZASMQWQ7JBA/action/replication_record"}},"created_at":"2026-05-17T23:38:13.504643+00:00","updated_at":"2026-05-17T23:38:13.504643+00:00"}