{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:Z5ADUDMKH2F656UVYHQUAOLJRB","short_pith_number":"pith:Z5ADUDMK","schema_version":"1.0","canonical_sha256":"cf403a0d8a3e8beefa95c1e1403969887052bc8ebb66ecc6cf15c9a9401cbcdc","source":{"kind":"arxiv","id":"1705.03122","version":3},"attestation_state":"computed","paper":{"title":"Convolutional Sequence to Sequence Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"David Grangier, Denis Yarats, Jonas Gehring, Michael Auli, Yann N. Dauphin","submitted_at":"2017-05-08T23:25:30Z","abstract_excerpt":"The prevalent approach to sequence to sequence learning maps an input sequence to a variable length output sequence via recurrent neural networks. We introduce an architecture based entirely on convolutional neural networks. Compared to recurrent models, computations over all elements can be fully parallelized during training and optimization is easier since the number of non-linearities is fixed and independent of the input length. Our use of gated linear units eases gradient propagation and we equip each decoder layer with a separate attention module. We outperform the accuracy of the deep L"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1705.03122","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-08T23:25:30Z","cross_cats_sorted":[],"title_canon_sha256":"c6413277a1ae0a78a0d0a5960ea65a5e37bd5a0769920021a8a82ab00a5ad7c9","abstract_canon_sha256":"d599f75790023dc226c27acfd63274aec977367046738e65b644ddc41b27f8fa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:39:35.176417Z","signature_b64":"plu4+IsjVTbStat3pCIL33Wj0PHs8+Icz7/aJvAvG3fNhNz1trojKHKW1mzZ+4FJd+hn5AUa+bIb2++EwUFtDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cf403a0d8a3e8beefa95c1e1403969887052bc8ebb66ecc6cf15c9a9401cbcdc","last_reissued_at":"2026-05-18T00:39:35.175924Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:39:35.175924Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Convolutional Sequence to Sequence Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"David Grangier, Denis Yarats, Jonas Gehring, Michael Auli, Yann N. Dauphin","submitted_at":"2017-05-08T23:25:30Z","abstract_excerpt":"The prevalent approach to sequence to sequence learning maps an input sequence to a variable length output sequence via recurrent neural networks. We introduce an architecture based entirely on convolutional neural networks. Compared to recurrent models, computations over all elements can be fully parallelized during training and optimization is easier since the number of non-linearities is fixed and independent of the input length. Our use of gated linear units eases gradient propagation and we equip each decoder layer with a separate attention module. We outperform the accuracy of the deep L"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.03122","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1705.03122","created_at":"2026-05-18T00:39:35.176004+00:00"},{"alias_kind":"arxiv_version","alias_value":"1705.03122v3","created_at":"2026-05-18T00:39:35.176004+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.03122","created_at":"2026-05-18T00:39:35.176004+00:00"},{"alias_kind":"pith_short_12","alias_value":"Z5ADUDMKH2F6","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_16","alias_value":"Z5ADUDMKH2F656UV","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_8","alias_value":"Z5ADUDMK","created_at":"2026-05-18T12:31:59.375834+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":13,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"1906.08996","citing_title":"Incremental Adaptation of NMT for Professional Post-editors: A User Study","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"1906.09084","citing_title":"Joint Detection of Malicious Domains and Infected Clients","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"1906.12158","citing_title":"Open-Ended Long-Form Video Question Answering via Hierarchical Convolutional Self-Attention Networks","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"1907.05321","citing_title":"Time2Vec: Learning a Vector Representation of Time","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"1907.09207","citing_title":"Deep Learning for Time Series Forecasting: The Electric Load Case","ref_index":86,"is_internal_anchor":true},{"citing_arxiv_id":"2003.00295","citing_title":"Adaptive Federated Optimization","ref_index":189,"is_internal_anchor":true},{"citing_arxiv_id":"2512.06938","citing_title":"Progress Ratio Embeddings: An Impatience Signal for Robust Length Control in Neural Text Generation","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"1807.03819","citing_title":"Universal Transformers","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2309.00071","citing_title":"YaRN: Efficient Context Window Extension of Large Language Models","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2604.09922","citing_title":"K-STEMIT: Knowledge-Informed Spatio-Temporal Efficient Multi-Branch Graph Neural Network for Subsurface Stratigraphy Thickness Estimation from Radar Data","ref_index":11,"is_internal_anchor":false},{"citing_arxiv_id":"1904.10509","citing_title":"Generating Long Sequences with Sparse Transformers","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"2604.21184","citing_title":"Predicting the thermodynamics in the chromosphere from the translation of SDO data into the IRIS$^{2}$ inversion results using a visual transformer model","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"1706.03762","citing_title":"Attention Is All You Need","ref_index":9,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB","json":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB.json","graph_json":"https://pith.science/api/pith-number/Z5ADUDMKH2F656UVYHQUAOLJRB/graph.json","events_json":"https://pith.science/api/pith-number/Z5ADUDMKH2F656UVYHQUAOLJRB/events.json","paper":"https://pith.science/paper/Z5ADUDMK"},"agent_actions":{"view_html":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB","download_json":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB.json","view_paper":"https://pith.science/paper/Z5ADUDMK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1705.03122&json=true","fetch_graph":"https://pith.science/api/pith-number/Z5ADUDMKH2F656UVYHQUAOLJRB/graph.json","fetch_events":"https://pith.science/api/pith-number/Z5ADUDMKH2F656UVYHQUAOLJRB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB/action/storage_attestation","attest_author":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB/action/author_attestation","sign_citation":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB/action/citation_signature","submit_replication":"https://pith.science/pith/Z5ADUDMKH2F656UVYHQUAOLJRB/action/replication_record"}},"created_at":"2026-05-18T00:39:35.176004+00:00","updated_at":"2026-05-18T00:39:35.176004+00:00"}