{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:NWSSFOBUGHPVMBMWBPEAZ4UTG6","short_pith_number":"pith:NWSSFOBU","schema_version":"1.0","canonical_sha256":"6da522b83431df5605960bc80cf29337b09bfc926098390bfbcdcebc54c474a8","source":{"kind":"arxiv","id":"1508.04025","version":5},"attestation_state":"computed","paper":{"title":"Effective Approaches to Attention-based Neural Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Christopher D. Manning, Hieu Pham, Minh-Thang Luong","submitted_at":"2015-08-17T13:43:19Z","abstract_excerpt":"An attentional mechanism has lately been used to improve neural machine translation (NMT) by selectively focusing on parts of the source sentence during translation. However, there has been little work exploring useful architectures for attention-based NMT. This paper examines two simple and effective classes of attentional mechanism: a global approach which always attends to all source words and a local one that only looks at a subset of source words at a time. We demonstrate the effectiveness of both approaches over the WMT translation tasks between English and German in both directions. Wit"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1508.04025","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-17T13:43:19Z","cross_cats_sorted":[],"title_canon_sha256":"89e232aecfb6e959ecd5fdeb5cfed4ed2c780f35ae31fa8b7c71b0f40826c4a2","abstract_canon_sha256":"034dad4d5cb7f93939763852d10005e859782dedca79707ce74ec01bb51da55e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:32:36.793954Z","signature_b64":"gdO1M9btKUzQ0VCHjoJudL9nmLR4ePkjeac1Jklgq6Rbar4Yf1xbfdmalYWUZ7Md+ror4zMB+KbeuPKh4Wy6CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6da522b83431df5605960bc80cf29337b09bfc926098390bfbcdcebc54c474a8","last_reissued_at":"2026-05-18T01:32:36.793455Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:32:36.793455Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Effective Approaches to Attention-based Neural Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Christopher D. Manning, Hieu Pham, Minh-Thang Luong","submitted_at":"2015-08-17T13:43:19Z","abstract_excerpt":"An attentional mechanism has lately been used to improve neural machine translation (NMT) by selectively focusing on parts of the source sentence during translation. However, there has been little work exploring useful architectures for attention-based NMT. This paper examines two simple and effective classes of attentional mechanism: a global approach which always attends to all source words and a local one that only looks at a subset of source words at a time. We demonstrate the effectiveness of both approaches over the WMT translation tasks between English and German in both directions. Wit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1508.04025","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1508.04025","created_at":"2026-05-18T01:32:36.793548+00:00"},{"alias_kind":"arxiv_version","alias_value":"1508.04025v5","created_at":"2026-05-18T01:32:36.793548+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1508.04025","created_at":"2026-05-18T01:32:36.793548+00:00"},{"alias_kind":"pith_short_12","alias_value":"NWSSFOBUGHPV","created_at":"2026-05-18T12:29:34.919912+00:00"},{"alias_kind":"pith_short_16","alias_value":"NWSSFOBUGHPVMBMW","created_at":"2026-05-18T12:29:34.919912+00:00"},{"alias_kind":"pith_short_8","alias_value":"NWSSFOBU","created_at":"2026-05-18T12:29:34.919912+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":13,"sample":[{"citing_arxiv_id":"1906.08089","citing_title":"Predicting Drug Responses by Propagating Interactions through Text-Enhanced Drug-Gene Networks","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"1906.08584","citing_title":"Improving Zero-shot Translation with Language-Independent Constraints","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"1906.10907","citing_title":"Leveraging Text Repetitions and Denoising Autoencoders in OCR Post-correction","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"1906.10910","citing_title":"Creating A Neural Pedagogical Agent by Jointly Learning to Review and Assess","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"1907.00570","citing_title":"Do Transformer Attention Heads Provide Transparency in Abstractive Summarization?","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02226","citing_title":"Graph-based Knowledge Distillation by Multi-head Attention Network","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"1907.06205","citing_title":"Automatic Repair and Type Binding of Undeclared Variables using Neural Networks","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07769","citing_title":"Hierarchical Sequence to Sequence Voice Conversion with Limited Data","ref_index":54,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07449","citing_title":"OGNet: Salient Object Detection with Output-guided Attention Module","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11512","citing_title":"Investigating Self-Attention Network for Chinese Word Segmentation","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11769","citing_title":"Automatically Learning Construction Injury Precursors from Text","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2502.12370","citing_title":"Positional Encoding in Transformer-Based Time Series Models: A Survey","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20635","citing_title":"The General Theory of Localization Methods","ref_index":69,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02451","citing_title":"Skeleton-based Coherence Modeling in Narratives","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"1804.03999","citing_title":"Attention U-Net: Learning Where to Look for the Pancreas","ref_index":19,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06578","citing_title":"Resource-Efficient CSI Prediction: A Gated Fusion and Factorized Projection Approach","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2604.22374","citing_title":"Selective Contrastive Learning For Gloss Free Sign Language Translation","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2604.21088","citing_title":"Jet Quenching Identification via Supervised Learning in Simulated Heavy-Ion Collisions","ref_index":56,"is_internal_anchor":false},{"citing_arxiv_id":"2408.00118","citing_title":"Gemma 2: Improving Open Language Models at a Practical Size","ref_index":138,"is_internal_anchor":false},{"citing_arxiv_id":"1706.03762","citing_title":"Attention Is All You Need","ref_index":24,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6","json":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6.json","graph_json":"https://pith.science/api/pith-number/NWSSFOBUGHPVMBMWBPEAZ4UTG6/graph.json","events_json":"https://pith.science/api/pith-number/NWSSFOBUGHPVMBMWBPEAZ4UTG6/events.json","paper":"https://pith.science/paper/NWSSFOBU"},"agent_actions":{"view_html":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6","download_json":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6.json","view_paper":"https://pith.science/paper/NWSSFOBU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1508.04025&json=true","fetch_graph":"https://pith.science/api/pith-number/NWSSFOBUGHPVMBMWBPEAZ4UTG6/graph.json","fetch_events":"https://pith.science/api/pith-number/NWSSFOBUGHPVMBMWBPEAZ4UTG6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6/action/storage_attestation","attest_author":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6/action/author_attestation","sign_citation":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6/action/citation_signature","submit_replication":"https://pith.science/pith/NWSSFOBUGHPVMBMWBPEAZ4UTG6/action/replication_record"}},"created_at":"2026-05-18T01:32:36.793548+00:00","updated_at":"2026-05-18T01:32:36.793548+00:00"}