{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:SRDHVPH3TAWTOGWRH2RJ6MWQZX","short_pith_number":"pith:SRDHVPH3","schema_version":"1.0","canonical_sha256":"94467abcfb982d371ad13ea29f32d0cdcb1d99b9c137fddd41ac2210727eee24","source":{"kind":"arxiv","id":"1505.00387","version":2},"attestation_state":"computed","paper":{"title":"Highway Networks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.NE"],"primary_cat":"cs.LG","authors_text":"J\\\"urgen Schmidhuber, Klaus Greff, Rupesh Kumar Srivastava","submitted_at":"2015-05-03T01:56:57Z","abstract_excerpt":"There is plenty of theoretical and empirical evidence that depth of neural networks is a crucial ingredient for their success. However, network training becomes more difficult with increasing depth and training of very deep networks remains an open problem. In this extended abstract, we introduce a new architecture designed to ease gradient-based training of very deep networks. We refer to networks with this architecture as highway networks, since they allow unimpeded information flow across several layers on \"information highways\". The architecture is characterized by the use of gating units "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1505.00387","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2015-05-03T01:56:57Z","cross_cats_sorted":["cs.NE"],"title_canon_sha256":"3b1d0845e28ac3080ac0fd767dabd650d441c606054609f5fd62a7a10672b55f","abstract_canon_sha256":"2ee343aa8c5a0428d08199ba4566e69a3c0431c2561fe7d2e0b09a4fa2e12daf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:28:05.167745Z","signature_b64":"MyODWG5Oc2DBe5CBHcGDQQEYGY1wtJNxaa/N5SQNBCjQx8UnTXgOMYfu2LYLNQnc70xLzjbel0UROHBIl0xWAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"94467abcfb982d371ad13ea29f32d0cdcb1d99b9c137fddd41ac2210727eee24","last_reissued_at":"2026-05-18T01:28:05.167158Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:28:05.167158Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Highway Networks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.NE"],"primary_cat":"cs.LG","authors_text":"J\\\"urgen Schmidhuber, Klaus Greff, Rupesh Kumar Srivastava","submitted_at":"2015-05-03T01:56:57Z","abstract_excerpt":"There is plenty of theoretical and empirical evidence that depth of neural networks is a crucial ingredient for their success. However, network training becomes more difficult with increasing depth and training of very deep networks remains an open problem. In this extended abstract, we introduce a new architecture designed to ease gradient-based training of very deep networks. We refer to networks with this architecture as highway networks, since they allow unimpeded information flow across several layers on \"information highways\". The architecture is characterized by the use of gating units "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.00387","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1505.00387","created_at":"2026-05-18T01:28:05.167247+00:00"},{"alias_kind":"arxiv_version","alias_value":"1505.00387v2","created_at":"2026-05-18T01:28:05.167247+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.00387","created_at":"2026-05-18T01:28:05.167247+00:00"},{"alias_kind":"pith_short_12","alias_value":"SRDHVPH3TAWT","created_at":"2026-05-18T12:29:42.218222+00:00"},{"alias_kind":"pith_short_16","alias_value":"SRDHVPH3TAWTOGWR","created_at":"2026-05-18T12:29:42.218222+00:00"},{"alias_kind":"pith_short_8","alias_value":"SRDHVPH3","created_at":"2026-05-18T12:29:42.218222+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":30,"internal_anchor_count":17,"sample":[{"citing_arxiv_id":"1906.12284","citing_title":"Widening the Representation Bottleneck in Neural Machine Translation with Lexical Shortcuts","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"1907.01686","citing_title":"Machine Reading Comprehension: a Literature Review","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"1907.01939","citing_title":"Neural Network Architecture Search with Differentiable Cartesian Genetic Programming for Regression","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2605.23259","citing_title":"Multi-Gate Residuals","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02871","citing_title":"Genetic Network Architecture Search","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"1907.04197","citing_title":"Attending to Emotional Narratives","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07255","citing_title":"Iterative temporal differencing with random synaptic feedback weights support error backpropagation for deep learning","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11519","citing_title":"Context-Aware Multipath Networks","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2602.08064","citing_title":"SiameseNorm: Breaking the Barrier to Reconciling Pre/Post-Norm","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2507.21334","citing_title":"Graph neural networks for residential location choice: connection to classical logit models","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"2603.15031","citing_title":"Attention Residuals","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20708","citing_title":"Rethinking Cross-Layer Information Routing in Diffusion Transformers","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18797","citing_title":"Simply Stabilizing the Loop via Fully Looped Transformer","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2601.00417","citing_title":"Deep Delta Learning","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2603.05117","citing_title":"SeedPolicy: Horizon Scaling via Self-Evolving Diffusion Policy for Robot Manipulation","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03297","citing_title":"XAttnRes: Cross-Stage Attention Residuals for Medical Image Segmentation","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13842","citing_title":"From DES to KiDS: Domain adaptation for cross-survey detection of low-surface-brightness galaxies","ref_index":268,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11989","citing_title":"A Transfer Learning Evaluation of Deep Neural Networks for Image Classification","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2104.13478","citing_title":"Geometric Deep Learning: Grids, Groups, Graphs, Geodesics, and Gauges","ref_index":85,"is_internal_anchor":false},{"citing_arxiv_id":"1605.07146","citing_title":"Wide Residual Networks","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"1609.08144","citing_title":"Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation","ref_index":40,"is_internal_anchor":false},{"citing_arxiv_id":"2505.06708","citing_title":"Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free","ref_index":24,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10298","citing_title":"Set Prediction for Next-Day Active Fire Forecasting","ref_index":30,"is_internal_anchor":false},{"citing_arxiv_id":"1710.05941","citing_title":"Searching for Activation Functions","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03953","citing_title":"Transformers with Selective Access to Early Representations","ref_index":16,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX","json":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX.json","graph_json":"https://pith.science/api/pith-number/SRDHVPH3TAWTOGWRH2RJ6MWQZX/graph.json","events_json":"https://pith.science/api/pith-number/SRDHVPH3TAWTOGWRH2RJ6MWQZX/events.json","paper":"https://pith.science/paper/SRDHVPH3"},"agent_actions":{"view_html":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX","download_json":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX.json","view_paper":"https://pith.science/paper/SRDHVPH3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1505.00387&json=true","fetch_graph":"https://pith.science/api/pith-number/SRDHVPH3TAWTOGWRH2RJ6MWQZX/graph.json","fetch_events":"https://pith.science/api/pith-number/SRDHVPH3TAWTOGWRH2RJ6MWQZX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX/action/storage_attestation","attest_author":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX/action/author_attestation","sign_citation":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX/action/citation_signature","submit_replication":"https://pith.science/pith/SRDHVPH3TAWTOGWRH2RJ6MWQZX/action/replication_record"}},"created_at":"2026-05-18T01:28:05.167247+00:00","updated_at":"2026-05-18T01:28:05.167247+00:00"}