{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ZM263OOSTJUXGQXAVPZYVTLVMQ","short_pith_number":"pith:ZM263OOS","schema_version":"1.0","canonical_sha256":"cb35edb9d29a697342e0abf38acd75643e2adfa77a045c54797bd836b6374524","source":{"kind":"arxiv","id":"1806.00900","version":2},"attestation_state":"computed","paper":{"title":"Algorithmic Regularization in Learning Deep Homogeneous Models: Layers are Automatically Balanced","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jason D. Lee, Simon S. Du, Wei Hu","submitted_at":"2018-06-04T00:07:38Z","abstract_excerpt":"We study the implicit regularization imposed by gradient descent for learning multi-layer homogeneous functions including feed-forward fully connected and convolutional deep neural networks with linear, ReLU or Leaky ReLU activation. We rigorously prove that gradient flow (i.e. gradient descent with infinitesimal step size) effectively enforces the differences between squared norms across different layers to remain invariant without any explicit regularization. This result implies that if the weights are initially small, gradient flow automatically balances the magnitudes of all layers. Using "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1806.00900","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T00:07:38Z","cross_cats_sorted":["math.OC","stat.ML"],"title_canon_sha256":"ed1bc22218b595041d06e9847cac1f04049c71000c7e2938a8101874befb6338","abstract_canon_sha256":"7219c01090fd3d5835952dd1707c5a5eb3d1c672d44c12609d6db37aeb2a298d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:53.178265Z","signature_b64":"uRdZzGFGMqdnJAUAAwfImDx6GNhL+QEmGQMlhbU/x13zgnLbER+81talNluJTVTxSuEctcwTJBY+AcLcHYbwBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cb35edb9d29a697342e0abf38acd75643e2adfa77a045c54797bd836b6374524","last_reissued_at":"2026-05-18T00:01:53.177859Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:53.177859Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Algorithmic Regularization in Learning Deep Homogeneous Models: Layers are Automatically Balanced","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jason D. Lee, Simon S. Du, Wei Hu","submitted_at":"2018-06-04T00:07:38Z","abstract_excerpt":"We study the implicit regularization imposed by gradient descent for learning multi-layer homogeneous functions including feed-forward fully connected and convolutional deep neural networks with linear, ReLU or Leaky ReLU activation. We rigorously prove that gradient flow (i.e. gradient descent with infinitesimal step size) effectively enforces the differences between squared norms across different layers to remain invariant without any explicit regularization. This result implies that if the weights are initially small, gradient flow automatically balances the magnitudes of all layers. Using "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.00900","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.00900","created_at":"2026-05-18T00:01:53.177924+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.00900v2","created_at":"2026-05-18T00:01:53.177924+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00900","created_at":"2026-05-18T00:01:53.177924+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZM263OOSTJUX","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZM263OOSTJUXGQXA","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZM263OOS","created_at":"2026-05-18T12:33:07.085635+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ","json":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ.json","graph_json":"https://pith.science/api/pith-number/ZM263OOSTJUXGQXAVPZYVTLVMQ/graph.json","events_json":"https://pith.science/api/pith-number/ZM263OOSTJUXGQXAVPZYVTLVMQ/events.json","paper":"https://pith.science/paper/ZM263OOS"},"agent_actions":{"view_html":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ","download_json":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ.json","view_paper":"https://pith.science/paper/ZM263OOS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.00900&json=true","fetch_graph":"https://pith.science/api/pith-number/ZM263OOSTJUXGQXAVPZYVTLVMQ/graph.json","fetch_events":"https://pith.science/api/pith-number/ZM263OOSTJUXGQXAVPZYVTLVMQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ/action/storage_attestation","attest_author":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ/action/author_attestation","sign_citation":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ/action/citation_signature","submit_replication":"https://pith.science/pith/ZM263OOSTJUXGQXAVPZYVTLVMQ/action/replication_record"}},"created_at":"2026-05-18T00:01:53.177924+00:00","updated_at":"2026-05-18T00:01:53.177924+00:00"}