{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:MXKHSITZ4KR66MZBOF4ZERSWDQ","short_pith_number":"pith:MXKHSITZ","schema_version":"1.0","canonical_sha256":"65d4792279e2a3ef332171799246561c1575568b0394b64d00e5672819843571","source":{"kind":"arxiv","id":"1812.10004","version":1},"attestation_state":"computed","paper":{"title":"Overparameterized Nonlinear Learning: Gradient Descent Takes the Shortest Path?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Mahdi Soltanolkotabi, Samet Oymak","submitted_at":"2018-12-25T01:10:28Z","abstract_excerpt":"Many modern learning tasks involve fitting nonlinear models to data which are trained in an overparameterized regime where the parameters of the model exceed the size of the training dataset. Due to this overparameterization, the training loss may have infinitely many global minima and it is critical to understand the properties of the solutions found by first-order optimization schemes such as (stochastic) gradient descent starting from different initializations. In this paper we demonstrate that when the loss has certain properties over a minimally small neighborhood of the initial point, fi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.10004","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-25T01:10:28Z","cross_cats_sorted":["math.OC","stat.ML"],"title_canon_sha256":"767e3a5d2d8c953bc5880a2e5124a0720ab594d71a1392c273b2a178dbdb7c31","abstract_canon_sha256":"64b49b5159f8e21c73afd6946b473146743404b05eb8859d6054748a8c0b44a3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:25.310113Z","signature_b64":"uNMcbPeg7s5AnQPnMgFupYYpTWWpAxeClGuLBf7RsBENFHGyYk3PDwx3h4qYRXzEhYfw40NKqs/DyDIqoZ/nDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"65d4792279e2a3ef332171799246561c1575568b0394b64d00e5672819843571","last_reissued_at":"2026-05-17T23:57:25.309508Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:25.309508Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Overparameterized Nonlinear Learning: Gradient Descent Takes the Shortest Path?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Mahdi Soltanolkotabi, Samet Oymak","submitted_at":"2018-12-25T01:10:28Z","abstract_excerpt":"Many modern learning tasks involve fitting nonlinear models to data which are trained in an overparameterized regime where the parameters of the model exceed the size of the training dataset. Due to this overparameterization, the training loss may have infinitely many global minima and it is critical to understand the properties of the solutions found by first-order optimization schemes such as (stochastic) gradient descent starting from different initializations. In this paper we demonstrate that when the loss has certain properties over a minimally small neighborhood of the initial point, fi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.10004","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.10004","created_at":"2026-05-17T23:57:25.309598+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.10004v1","created_at":"2026-05-17T23:57:25.309598+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.10004","created_at":"2026-05-17T23:57:25.309598+00:00"},{"alias_kind":"pith_short_12","alias_value":"MXKHSITZ4KR6","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_16","alias_value":"MXKHSITZ4KR66MZB","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_8","alias_value":"MXKHSITZ","created_at":"2026-05-18T12:32:40.477152+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.08654","citing_title":"ID3 Learns Juntas for Smoothed Product Distributions","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06959","citing_title":"Locally Near Optimal Piecewise Linear Regression in High Dimensions via Difference of Max-Affine Functions","ref_index":116,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ","json":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ.json","graph_json":"https://pith.science/api/pith-number/MXKHSITZ4KR66MZBOF4ZERSWDQ/graph.json","events_json":"https://pith.science/api/pith-number/MXKHSITZ4KR66MZBOF4ZERSWDQ/events.json","paper":"https://pith.science/paper/MXKHSITZ"},"agent_actions":{"view_html":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ","download_json":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ.json","view_paper":"https://pith.science/paper/MXKHSITZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.10004&json=true","fetch_graph":"https://pith.science/api/pith-number/MXKHSITZ4KR66MZBOF4ZERSWDQ/graph.json","fetch_events":"https://pith.science/api/pith-number/MXKHSITZ4KR66MZBOF4ZERSWDQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ/action/storage_attestation","attest_author":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ/action/author_attestation","sign_citation":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ/action/citation_signature","submit_replication":"https://pith.science/pith/MXKHSITZ4KR66MZBOF4ZERSWDQ/action/replication_record"}},"created_at":"2026-05-17T23:57:25.309598+00:00","updated_at":"2026-05-17T23:57:25.309598+00:00"}