{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:G2O27NVXFMXEWERV5TYLCDN5QF","short_pith_number":"pith:G2O27NVX","schema_version":"1.0","canonical_sha256":"369dafb6b72b2e4b1235ecf0b10dbd81469458a032d1a53385fb3364969057cc","source":{"kind":"arxiv","id":"1809.00388","version":1},"attestation_state":"computed","paper":{"title":"MTNT: A Testbed for Machine Translation of Noisy Text","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Graham Neubig, Paul Michel","submitted_at":"2018-09-02T20:43:09Z","abstract_excerpt":"Noisy or non-standard input text can cause disastrous mistranslations in most modern Machine Translation (MT) systems, and there has been growing research interest in creating noise-robust MT systems. However, as of yet there are no publicly available parallel corpora of with naturally occurring noisy inputs and translations, and thus previous work has resorted to evaluating on synthetically created datasets. In this paper, we propose a benchmark dataset for Machine Translation of Noisy Text (MTNT), consisting of noisy comments on Reddit (www.reddit.com) and professionally sourced translations"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.00388","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-02T20:43:09Z","cross_cats_sorted":[],"title_canon_sha256":"e9b34f2dcf64c3d7743590a81658b09c301937e14faf57dc4f6f7e8d6c91d62a","abstract_canon_sha256":"3aaf161a53042081dfdd19c016db9bb93127cf4dad8fa6a11ef339e836b65795"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:06:35.352310Z","signature_b64":"9/hXz1p4tDN2C0ww0kTQTNLNK/gPwpyorFZ2r26sr0ZkqXSsYLXAmNH2+qqanD63M4Mux8T+cxyako8QuTmUDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"369dafb6b72b2e4b1235ecf0b10dbd81469458a032d1a53385fb3364969057cc","last_reissued_at":"2026-05-18T00:06:35.351721Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:06:35.351721Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MTNT: A Testbed for Machine Translation of Noisy Text","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Graham Neubig, Paul Michel","submitted_at":"2018-09-02T20:43:09Z","abstract_excerpt":"Noisy or non-standard input text can cause disastrous mistranslations in most modern Machine Translation (MT) systems, and there has been growing research interest in creating noise-robust MT systems. However, as of yet there are no publicly available parallel corpora of with naturally occurring noisy inputs and translations, and thus previous work has resorted to evaluating on synthetically created datasets. In this paper, we propose a benchmark dataset for Machine Translation of Noisy Text (MTNT), consisting of noisy comments on Reddit (www.reddit.com) and professionally sourced translations"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.00388","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.00388","created_at":"2026-05-18T00:06:35.351805+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.00388v1","created_at":"2026-05-18T00:06:35.351805+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.00388","created_at":"2026-05-18T00:06:35.351805+00:00"},{"alias_kind":"pith_short_12","alias_value":"G2O27NVXFMXE","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_16","alias_value":"G2O27NVXFMXEWERV","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_8","alias_value":"G2O27NVX","created_at":"2026-05-18T12:32:25.280505+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.08393","citing_title":"Robust Machine Translation with Domain Sensitive Pseudo-Sources: Baidu-OSU WMT19 MT Robustness Shared Task System Report","ref_index":17,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF","json":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF.json","graph_json":"https://pith.science/api/pith-number/G2O27NVXFMXEWERV5TYLCDN5QF/graph.json","events_json":"https://pith.science/api/pith-number/G2O27NVXFMXEWERV5TYLCDN5QF/events.json","paper":"https://pith.science/paper/G2O27NVX"},"agent_actions":{"view_html":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF","download_json":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF.json","view_paper":"https://pith.science/paper/G2O27NVX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.00388&json=true","fetch_graph":"https://pith.science/api/pith-number/G2O27NVXFMXEWERV5TYLCDN5QF/graph.json","fetch_events":"https://pith.science/api/pith-number/G2O27NVXFMXEWERV5TYLCDN5QF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF/action/storage_attestation","attest_author":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF/action/author_attestation","sign_citation":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF/action/citation_signature","submit_replication":"https://pith.science/pith/G2O27NVXFMXEWERV5TYLCDN5QF/action/replication_record"}},"created_at":"2026-05-18T00:06:35.351805+00:00","updated_at":"2026-05-18T00:06:35.351805+00:00"}