{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:XOGAMZQ4LO5V74KK5SRPHO2QVP","short_pith_number":"pith:XOGAMZQ4","schema_version":"1.0","canonical_sha256":"bb8c06661c5bbb5ff14aeca2f3bb50abfac1d9f087a85f9c896612dbc895c4bf","source":{"kind":"arxiv","id":"1906.06442","version":1},"attestation_state":"computed","paper":{"title":"Tagged Back-Translation","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Ciprian Chelba, David Grangier, Isaac Caswell","submitted_at":"2019-06-15T00:36:41Z","abstract_excerpt":"Recent work in Neural Machine Translation (NMT) has shown significant quality gains from noised-beam decoding during back-translation, a method to generate synthetic parallel data. We show that the main role of such synthetic noise is not to diversify the source side, as previously suggested, but simply to indicate to the model that the given source is synthetic. We propose a simpler alternative to noising techniques, consisting of tagging back-translated source sentences with an extra token. Our results on WMT outperform noised back-translation in English-Romanian and match performance on Eng"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.06442","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2019-06-15T00:36:41Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"195a03de4b349f44da08131314019c815834e0dcb1074031fbdd4b0f69962d5d","abstract_canon_sha256":"e70a7339bdc56bc3635bd4f4d8788ebe4d5a8809547f00ad46ca7122314928df"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:13.669135Z","signature_b64":"IWsIUIPMUKqwcncvjNZEPG6sSxd1ICWImt8YM8+/nU1ylS8wrnaWm+0KVdcP3iwfUKpIRGEDUfF3Q8xHptzeBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bb8c06661c5bbb5ff14aeca2f3bb50abfac1d9f087a85f9c896612dbc895c4bf","last_reissued_at":"2026-05-17T23:43:13.668705Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:13.668705Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Tagged Back-Translation","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Ciprian Chelba, David Grangier, Isaac Caswell","submitted_at":"2019-06-15T00:36:41Z","abstract_excerpt":"Recent work in Neural Machine Translation (NMT) has shown significant quality gains from noised-beam decoding during back-translation, a method to generate synthetic parallel data. We show that the main role of such synthetic noise is not to diversify the source side, as previously suggested, but simply to indicate to the model that the given source is synthetic. We propose a simpler alternative to noising techniques, consisting of tagging back-translated source sentences with an extra token. Our results on WMT outperform noised back-translation in English-Romanian and match performance on Eng"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.06442","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.06442","created_at":"2026-05-17T23:43:13.668773+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.06442v1","created_at":"2026-05-17T23:43:13.668773+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.06442","created_at":"2026-05-17T23:43:13.668773+00:00"},{"alias_kind":"pith_short_12","alias_value":"XOGAMZQ4LO5V","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"XOGAMZQ4LO5V74KK","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"XOGAMZQ4","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2401.10020","citing_title":"Self-Rewarding Language Models","ref_index":13,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP","json":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP.json","graph_json":"https://pith.science/api/pith-number/XOGAMZQ4LO5V74KK5SRPHO2QVP/graph.json","events_json":"https://pith.science/api/pith-number/XOGAMZQ4LO5V74KK5SRPHO2QVP/events.json","paper":"https://pith.science/paper/XOGAMZQ4"},"agent_actions":{"view_html":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP","download_json":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP.json","view_paper":"https://pith.science/paper/XOGAMZQ4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.06442&json=true","fetch_graph":"https://pith.science/api/pith-number/XOGAMZQ4LO5V74KK5SRPHO2QVP/graph.json","fetch_events":"https://pith.science/api/pith-number/XOGAMZQ4LO5V74KK5SRPHO2QVP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP/action/storage_attestation","attest_author":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP/action/author_attestation","sign_citation":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP/action/citation_signature","submit_replication":"https://pith.science/pith/XOGAMZQ4LO5V74KK5SRPHO2QVP/action/replication_record"}},"created_at":"2026-05-17T23:43:13.668773+00:00","updated_at":"2026-05-17T23:43:13.668773+00:00"}