{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:WRJVZ3OQIEDMET5P3X55DTLD4F","short_pith_number":"pith:WRJVZ3OQ","schema_version":"1.0","canonical_sha256":"b4535cedd04106c24fafddfbd1cd63e1435ded156f187f8ecb726b1117445d30","source":{"kind":"arxiv","id":"1802.06093","version":4},"attestation_state":"computed","paper":{"title":"Gradient descent with identity initialization efficiently learns positive definite linear transformations by deep residual networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE","math.OC","math.ST","stat.ML","stat.TH"],"primary_cat":"cs.LG","authors_text":"David P. Helmbold, Peter L. Bartlett, Philip M. Long","submitted_at":"2018-02-16T19:24:29Z","abstract_excerpt":"We analyze algorithms for approximating a function $f(x) = \\Phi x$ mapping $\\Re^d$ to $\\Re^d$ using deep linear neural networks, i.e. that learn a function $h$ parameterized by matrices $\\Theta_1,...,\\Theta_L$ and defined by $h(x) = \\Theta_L \\Theta_{L-1} ... \\Theta_1 x$. We focus on algorithms that learn through gradient descent on the population quadratic loss in the case that the distribution over the inputs is isotropic.\n  We provide polynomial bounds on the number of iterations for gradient descent to approximate the least squares matrix $\\Phi$, in the case where the initial hypothesis $\\T"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.06093","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-02-16T19:24:29Z","cross_cats_sorted":["cs.NE","math.OC","math.ST","stat.ML","stat.TH"],"title_canon_sha256":"4c0d386ac65498bf2fe456cbb084fe30bc038a11bec3c44d211ed729d9593c49","abstract_canon_sha256":"6ef943f7f5b25ca9fa32b385a2aa34ff239f0aed74d7ac8b81ed20f0970b2f73"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:04.625445Z","signature_b64":"C38danIAQkn021904BPWlZ0IlKtLkKfPnDBr58DWAjRzcRkO5mQhnJl4Nqe3AHKx29JsmzvS14/KQzxhKKaTCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b4535cedd04106c24fafddfbd1cd63e1435ded156f187f8ecb726b1117445d30","last_reissued_at":"2026-05-18T00:13:04.624728Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:04.624728Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Gradient descent with identity initialization efficiently learns positive definite linear transformations by deep residual networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE","math.OC","math.ST","stat.ML","stat.TH"],"primary_cat":"cs.LG","authors_text":"David P. Helmbold, Peter L. Bartlett, Philip M. Long","submitted_at":"2018-02-16T19:24:29Z","abstract_excerpt":"We analyze algorithms for approximating a function $f(x) = \\Phi x$ mapping $\\Re^d$ to $\\Re^d$ using deep linear neural networks, i.e. that learn a function $h$ parameterized by matrices $\\Theta_1,...,\\Theta_L$ and defined by $h(x) = \\Theta_L \\Theta_{L-1} ... \\Theta_1 x$. We focus on algorithms that learn through gradient descent on the population quadratic loss in the case that the distribution over the inputs is isotropic.\n  We provide polynomial bounds on the number of iterations for gradient descent to approximate the least squares matrix $\\Phi$, in the case where the initial hypothesis $\\T"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.06093","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.06093","created_at":"2026-05-18T00:13:04.624850+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.06093v4","created_at":"2026-05-18T00:13:04.624850+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.06093","created_at":"2026-05-18T00:13:04.624850+00:00"},{"alias_kind":"pith_short_12","alias_value":"WRJVZ3OQIEDM","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_16","alias_value":"WRJVZ3OQIEDMET5P","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_8","alias_value":"WRJVZ3OQ","created_at":"2026-05-18T12:33:01.666342+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F","json":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F.json","graph_json":"https://pith.science/api/pith-number/WRJVZ3OQIEDMET5P3X55DTLD4F/graph.json","events_json":"https://pith.science/api/pith-number/WRJVZ3OQIEDMET5P3X55DTLD4F/events.json","paper":"https://pith.science/paper/WRJVZ3OQ"},"agent_actions":{"view_html":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F","download_json":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F.json","view_paper":"https://pith.science/paper/WRJVZ3OQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.06093&json=true","fetch_graph":"https://pith.science/api/pith-number/WRJVZ3OQIEDMET5P3X55DTLD4F/graph.json","fetch_events":"https://pith.science/api/pith-number/WRJVZ3OQIEDMET5P3X55DTLD4F/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F/action/storage_attestation","attest_author":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F/action/author_attestation","sign_citation":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F/action/citation_signature","submit_replication":"https://pith.science/pith/WRJVZ3OQIEDMET5P3X55DTLD4F/action/replication_record"}},"created_at":"2026-05-18T00:13:04.624850+00:00","updated_at":"2026-05-18T00:13:04.624850+00:00"}