{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:CEXEJTDFHL4JDAZ6LPEYNLD4NS","short_pith_number":"pith:CEXEJTDF","schema_version":"1.0","canonical_sha256":"112e44cc653af891833e5bc986ac7c6cb0f71acee085e0eae886b697a99f05af","source":{"kind":"arxiv","id":"1602.07868","version":3},"attestation_state":"computed","paper":{"title":"Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.NE"],"primary_cat":"cs.LG","authors_text":"Diederik P. Kingma, Tim Salimans","submitted_at":"2016-02-25T10:13:45Z","abstract_excerpt":"We present weight normalization: a reparameterization of the weight vectors in a neural network that decouples the length of those weight vectors from their direction. By reparameterizing the weights in this way we improve the conditioning of the optimization problem and we speed up convergence of stochastic gradient descent. Our reparameterization is inspired by batch normalization but does not introduce any dependencies between the examples in a minibatch. This means that our method can also be applied successfully to recurrent models such as LSTMs and to noise-sensitive applications such as"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1602.07868","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-02-25T10:13:45Z","cross_cats_sorted":["cs.AI","cs.NE"],"title_canon_sha256":"838ac1d78d3b8e5f48af52a50ba314fa3c70aa7690bdb4b0a8b58b040b8cb9b6","abstract_canon_sha256":"7377e2919f1ff9875ffabd3104b6ea7d9cc5bce28a77d9f6dce11a156338ee22"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:12:54.924706Z","signature_b64":"qQi5wHlkOv/iCsxxURefVdzPj8q56r2RwHzTsnR4VXIAqm4jPJXj6j9MChvRzyhtk/CGEZ4hB65n43eFk/GTCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"112e44cc653af891833e5bc986ac7c6cb0f71acee085e0eae886b697a99f05af","last_reissued_at":"2026-05-18T01:12:54.924357Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:12:54.924357Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.NE"],"primary_cat":"cs.LG","authors_text":"Diederik P. Kingma, Tim Salimans","submitted_at":"2016-02-25T10:13:45Z","abstract_excerpt":"We present weight normalization: a reparameterization of the weight vectors in a neural network that decouples the length of those weight vectors from their direction. By reparameterizing the weights in this way we improve the conditioning of the optimization problem and we speed up convergence of stochastic gradient descent. Our reparameterization is inspired by batch normalization but does not introduce any dependencies between the examples in a minibatch. This means that our method can also be applied successfully to recurrent models such as LSTMs and to noise-sensitive applications such as"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1602.07868","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1602.07868","created_at":"2026-05-18T01:12:54.924416+00:00"},{"alias_kind":"arxiv_version","alias_value":"1602.07868v3","created_at":"2026-05-18T01:12:54.924416+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1602.07868","created_at":"2026-05-18T01:12:54.924416+00:00"},{"alias_kind":"pith_short_12","alias_value":"CEXEJTDFHL4J","created_at":"2026-05-18T12:30:09.641336+00:00"},{"alias_kind":"pith_short_16","alias_value":"CEXEJTDFHL4JDAZ6","created_at":"2026-05-18T12:30:09.641336+00:00"},{"alias_kind":"pith_short_8","alias_value":"CEXEJTDF","created_at":"2026-05-18T12:30:09.641336+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2502.02345","citing_title":"Low Rank Based Subspace Inference for the Laplace Approximation of Bayesian Neural Networks","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2106.10689","citing_title":"NeuS: Learning Neural Implicit Surfaces by Volume Rendering for Multi-view Reconstruction","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2602.04737","citing_title":"Rationality Measurement and Theory for Reinforcement Learning Agents","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2104.13478","citing_title":"Geometric Deep Learning: Grids, Groups, Graphs, Geodesics, and Gauges","ref_index":72,"is_internal_anchor":false},{"citing_arxiv_id":"1710.10196","citing_title":"Progressive Growing of GANs for Improved Quality, Stability, and Variation","ref_index":42,"is_internal_anchor":false},{"citing_arxiv_id":"1605.08803","citing_title":"Density estimation using Real NVP","ref_index":54,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04418","citing_title":"Demystifying Manifold Constraints in LLM Pre-training","ref_index":27,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS","json":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS.json","graph_json":"https://pith.science/api/pith-number/CEXEJTDFHL4JDAZ6LPEYNLD4NS/graph.json","events_json":"https://pith.science/api/pith-number/CEXEJTDFHL4JDAZ6LPEYNLD4NS/events.json","paper":"https://pith.science/paper/CEXEJTDF"},"agent_actions":{"view_html":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS","download_json":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS.json","view_paper":"https://pith.science/paper/CEXEJTDF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1602.07868&json=true","fetch_graph":"https://pith.science/api/pith-number/CEXEJTDFHL4JDAZ6LPEYNLD4NS/graph.json","fetch_events":"https://pith.science/api/pith-number/CEXEJTDFHL4JDAZ6LPEYNLD4NS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS/action/storage_attestation","attest_author":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS/action/author_attestation","sign_citation":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS/action/citation_signature","submit_replication":"https://pith.science/pith/CEXEJTDFHL4JDAZ6LPEYNLD4NS/action/replication_record"}},"created_at":"2026-05-18T01:12:54.924416+00:00","updated_at":"2026-05-18T01:12:54.924416+00:00"}