{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:VUYQU7AUG7MSHV7O2L5HDDMKL5","short_pith_number":"pith:VUYQU7AU","schema_version":"1.0","canonical_sha256":"ad310a7c1437d923d7eed2fa718d8a5f5e3e2ed7331db1de5e985c02c00c859e","source":{"kind":"arxiv","id":"1605.08361","version":2},"attestation_state":"computed","paper":{"title":"No bad local minima: Data independent training error guarantees for multilayer neural networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"stat.ML","authors_text":"Daniel Soudry, Yair Carmon","submitted_at":"2016-05-26T16:51:05Z","abstract_excerpt":"We use smoothed analysis techniques to provide guarantees on the training loss of Multilayer Neural Networks (MNNs) at differentiable local minima. Specifically, we examine MNNs with piecewise linear activation functions, quadratic loss and a single output, under mild over-parametrization. We prove that for a MNN with one hidden layer, the training error is zero at every differentiable local minimum, for almost every dataset and dropout-like noise realization. We then extend these results to the case of more than one hidden layer. Our theoretical guarantees assume essentially nothing on the tr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1605.08361","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-05-26T16:51:05Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"61faf19b686241be52a49eadb62c62122967db2b0fa5c3487f5f31cd892b8c82","abstract_canon_sha256":"8157d6baf2d2c7991845e204eb9ea21bd74e1598ae466d242124c68afd5df48c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:13:23.123799Z","signature_b64":"HW5mKQeEOAOsZN5fi/v0Lvk9NDDgdOxs2JJZhl7zlfieQrtl7ql+TEnUhoo21fKMzA8H4YHqNqf0WFUglQtrDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ad310a7c1437d923d7eed2fa718d8a5f5e3e2ed7331db1de5e985c02c00c859e","last_reissued_at":"2026-05-18T01:13:23.123289Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:13:23.123289Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"No bad local minima: Data independent training error guarantees for multilayer neural networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"stat.ML","authors_text":"Daniel Soudry, Yair Carmon","submitted_at":"2016-05-26T16:51:05Z","abstract_excerpt":"We use smoothed analysis techniques to provide guarantees on the training loss of Multilayer Neural Networks (MNNs) at differentiable local minima. Specifically, we examine MNNs with piecewise linear activation functions, quadratic loss and a single output, under mild over-parametrization. We prove that for a MNN with one hidden layer, the training error is zero at every differentiable local minimum, for almost every dataset and dropout-like noise realization. We then extend these results to the case of more than one hidden layer. Our theoretical guarantees assume essentially nothing on the tr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.08361","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1605.08361","created_at":"2026-05-18T01:13:23.123377+00:00"},{"alias_kind":"arxiv_version","alias_value":"1605.08361v2","created_at":"2026-05-18T01:13:23.123377+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.08361","created_at":"2026-05-18T01:13:23.123377+00:00"},{"alias_kind":"pith_short_12","alias_value":"VUYQU7AUG7MS","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_16","alias_value":"VUYQU7AUG7MSHV7O","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_8","alias_value":"VUYQU7AU","created_at":"2026-05-18T12:30:48.956258+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"1907.00485","citing_title":"Robust and Resource Efficient Identification of Two Hidden Layer Neural Networks","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02911","citing_title":"Weight-space symmetry in deep networks gives rise to permutation saddles, connected by equal-loss valleys across the loss landscape","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2506.14951","citing_title":"Flat Channels to Infinity in Neural Loss Landscapes","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2401.01335","citing_title":"Self-Play Fine-Tuning Converts Weak Language Models to Strong Language Models","ref_index":111,"is_internal_anchor":true},{"citing_arxiv_id":"1609.04836","citing_title":"On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima","ref_index":13,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5","json":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5.json","graph_json":"https://pith.science/api/pith-number/VUYQU7AUG7MSHV7O2L5HDDMKL5/graph.json","events_json":"https://pith.science/api/pith-number/VUYQU7AUG7MSHV7O2L5HDDMKL5/events.json","paper":"https://pith.science/paper/VUYQU7AU"},"agent_actions":{"view_html":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5","download_json":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5.json","view_paper":"https://pith.science/paper/VUYQU7AU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1605.08361&json=true","fetch_graph":"https://pith.science/api/pith-number/VUYQU7AUG7MSHV7O2L5HDDMKL5/graph.json","fetch_events":"https://pith.science/api/pith-number/VUYQU7AUG7MSHV7O2L5HDDMKL5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5/action/storage_attestation","attest_author":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5/action/author_attestation","sign_citation":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5/action/citation_signature","submit_replication":"https://pith.science/pith/VUYQU7AUG7MSHV7O2L5HDDMKL5/action/replication_record"}},"created_at":"2026-05-18T01:13:23.123377+00:00","updated_at":"2026-05-18T01:13:23.123377+00:00"}