{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:US74M5CD5LK4NHKPIAPEGUWYWU","short_pith_number":"pith:US74M5CD","schema_version":"1.0","canonical_sha256":"a4bfc67443ead5c69d4f401e4352d8b5140ab677c39f3e3ca533c1ec3d0d314d","source":{"kind":"arxiv","id":"1511.06422","version":7},"attestation_state":"computed","paper":{"title":"All you need is a good init","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Dmytro Mishkin, Jiri Matas","submitted_at":"2015-11-19T22:19:15Z","abstract_excerpt":"Layer-sequential unit-variance (LSUV) initialization - a simple method for weight initialization for deep net learning - is proposed. The method consists of the two steps. First, pre-initialize weights of each convolution or inner-product layer with orthonormal matrices. Second, proceed from the first to the final layer, normalizing the variance of the output of each layer to be equal to one.\n  Experiment with different activation functions (maxout, ReLU-family, tanh) show that the proposed initialization leads to learning of very deep nets that (i) produces networks with test accuracy better "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.06422","kind":"arxiv","version":7},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-11-19T22:19:15Z","cross_cats_sorted":[],"title_canon_sha256":"90df91ccfb506b45063fc6fb208289e80e2fb8cc03b6e6cc6503163f4a6c300d","abstract_canon_sha256":"c36d07f73634413e5d27fe1018103d991d86ad075a2ca1cbac5a84b63f39449c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:20:18.841987Z","signature_b64":"2zBp1qVcJfjOV99WIJJyFVMcdo7ez/gMJJGd1cWEYoKuvnHSAOHgBi3z7MTmO3sNFL2HkE0mjbw6G0XcQEJdBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a4bfc67443ead5c69d4f401e4352d8b5140ab677c39f3e3ca533c1ec3d0d314d","last_reissued_at":"2026-05-18T01:20:18.841408Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:20:18.841408Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"All you need is a good init","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Dmytro Mishkin, Jiri Matas","submitted_at":"2015-11-19T22:19:15Z","abstract_excerpt":"Layer-sequential unit-variance (LSUV) initialization - a simple method for weight initialization for deep net learning - is proposed. The method consists of the two steps. First, pre-initialize weights of each convolution or inner-product layer with orthonormal matrices. Second, proceed from the first to the final layer, normalizing the variance of the output of each layer to be equal to one.\n  Experiment with different activation functions (maxout, ReLU-family, tanh) show that the proposed initialization leads to learning of very deep nets that (i) produces networks with test accuracy better "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.06422","kind":"arxiv","version":7},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.06422","created_at":"2026-05-18T01:20:18.841492+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.06422v7","created_at":"2026-05-18T01:20:18.841492+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.06422","created_at":"2026-05-18T01:20:18.841492+00:00"},{"alias_kind":"pith_short_12","alias_value":"US74M5CD5LK4","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_16","alias_value":"US74M5CD5LK4NHKP","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_8","alias_value":"US74M5CD","created_at":"2026-05-18T12:29:44.643036+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1907.00664","citing_title":"Learning World Graphs to Accelerate Hierarchical Reinforcement Learning","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15416","citing_title":"Margin-Adaptive Confidence Ranking for Reliable LLM Judgement","ref_index":227,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08171","citing_title":"Communication Dynamics Neural Networks: FFT-Diagonalized Layers for Improved Hessian Conditioning at Reduced Parameter Count","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08842","citing_title":"XPERT: Expert Knowledge Transfer for Effective Training of Language Models","ref_index":67,"is_internal_anchor":false},{"citing_arxiv_id":"2604.23070","citing_title":"Learning the Weather-Grid Nexus via Weather-to-Voltage (W2V) Predictive Modeling","ref_index":31,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06563","citing_title":"Criticality and Saturation in Orthogonal Neural Networks","ref_index":5,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU","json":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU.json","graph_json":"https://pith.science/api/pith-number/US74M5CD5LK4NHKPIAPEGUWYWU/graph.json","events_json":"https://pith.science/api/pith-number/US74M5CD5LK4NHKPIAPEGUWYWU/events.json","paper":"https://pith.science/paper/US74M5CD"},"agent_actions":{"view_html":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU","download_json":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU.json","view_paper":"https://pith.science/paper/US74M5CD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.06422&json=true","fetch_graph":"https://pith.science/api/pith-number/US74M5CD5LK4NHKPIAPEGUWYWU/graph.json","fetch_events":"https://pith.science/api/pith-number/US74M5CD5LK4NHKPIAPEGUWYWU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU/action/storage_attestation","attest_author":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU/action/author_attestation","sign_citation":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU/action/citation_signature","submit_replication":"https://pith.science/pith/US74M5CD5LK4NHKPIAPEGUWYWU/action/replication_record"}},"created_at":"2026-05-18T01:20:18.841492+00:00","updated_at":"2026-05-18T01:20:18.841492+00:00"}