{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:NT2NHCCANLJ6ZXLPSXHLIBBRMS","short_pith_number":"pith:NT2NHCCA","schema_version":"1.0","canonical_sha256":"6cf4d388406ad3ecdd6f95ceb4043164bf6f59f1986a675dcbda410461fa1b4e","source":{"kind":"arxiv","id":"1711.02173","version":2},"attestation_state":"computed","paper":{"title":"Synthetic and Natural Noise Both Break Neural Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Yonatan Belinkov, Yonatan Bisk","submitted_at":"2017-11-06T20:59:58Z","abstract_excerpt":"Character-based neural machine translation (NMT) models alleviate out-of-vocabulary issues, learn morphology, and move us closer to completely end-to-end translation systems. Unfortunately, they are also very brittle and easily falter when presented with noisy data. In this paper, we confront NMT models with synthetic and natural sources of noise. We find that state-of-the-art models fail to translate even moderately noisy texts that humans have no trouble comprehending. We explore two approaches to increase model robustness: structure-invariant word representations and robust training on nois"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.02173","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-06T20:59:58Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"239bcd41ba4e455d118b5e7fb8e6fcee1e0b6ab5cd8c8dd11604799b59b5d353","abstract_canon_sha256":"b62fcbac008242056782c0e7820393e9625a3ac6aff6eec03587f921bcaca3ef"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:39.251847Z","signature_b64":"pU1ZbCkpe6ffKBTYqiuqySVXrKSzTZtOq0O2cnPrTxdiBSckdpiI6+lSNvD9DnNw27tq9u3RfhZ/vgG/3ujcDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6cf4d388406ad3ecdd6f95ceb4043164bf6f59f1986a675dcbda410461fa1b4e","last_reissued_at":"2026-05-18T00:22:39.251505Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:39.251505Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Synthetic and Natural Noise Both Break Neural Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Yonatan Belinkov, Yonatan Bisk","submitted_at":"2017-11-06T20:59:58Z","abstract_excerpt":"Character-based neural machine translation (NMT) models alleviate out-of-vocabulary issues, learn morphology, and move us closer to completely end-to-end translation systems. Unfortunately, they are also very brittle and easily falter when presented with noisy data. In this paper, we confront NMT models with synthetic and natural sources of noise. We find that state-of-the-art models fail to translate even moderately noisy texts that humans have no trouble comprehending. We explore two approaches to increase model robustness: structure-invariant word representations and robust training on nois"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.02173","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.02173","created_at":"2026-05-18T00:22:39.251560+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.02173v2","created_at":"2026-05-18T00:22:39.251560+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.02173","created_at":"2026-05-18T00:22:39.251560+00:00"},{"alias_kind":"pith_short_12","alias_value":"NT2NHCCANLJ6","created_at":"2026-05-18T12:31:34.259226+00:00"},{"alias_kind":"pith_short_16","alias_value":"NT2NHCCANLJ6ZXLP","created_at":"2026-05-18T12:31:34.259226+00:00"},{"alias_kind":"pith_short_8","alias_value":"NT2NHCCA","created_at":"2026-05-18T12:31:34.259226+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.08393","citing_title":"Robust Machine Translation with Domain Sensitive Pseudo-Sources: Baidu-OSU WMT19 MT Robustness Shared Task System Report","ref_index":4,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS","json":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS.json","graph_json":"https://pith.science/api/pith-number/NT2NHCCANLJ6ZXLPSXHLIBBRMS/graph.json","events_json":"https://pith.science/api/pith-number/NT2NHCCANLJ6ZXLPSXHLIBBRMS/events.json","paper":"https://pith.science/paper/NT2NHCCA"},"agent_actions":{"view_html":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS","download_json":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS.json","view_paper":"https://pith.science/paper/NT2NHCCA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.02173&json=true","fetch_graph":"https://pith.science/api/pith-number/NT2NHCCANLJ6ZXLPSXHLIBBRMS/graph.json","fetch_events":"https://pith.science/api/pith-number/NT2NHCCANLJ6ZXLPSXHLIBBRMS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS/action/storage_attestation","attest_author":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS/action/author_attestation","sign_citation":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS/action/citation_signature","submit_replication":"https://pith.science/pith/NT2NHCCANLJ6ZXLPSXHLIBBRMS/action/replication_record"}},"created_at":"2026-05-18T00:22:39.251560+00:00","updated_at":"2026-05-18T00:22:39.251560+00:00"}