{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:CK4R7VSS44AH2XL726GRSND4HG","short_pith_number":"pith:CK4R7VSS","schema_version":"1.0","canonical_sha256":"12b91fd652e7007d5d7fd78d19347c399627028885c169cf48c1deb5f19a42f3","source":{"kind":"arxiv","id":"1809.00125","version":2},"attestation_state":"computed","paper":{"title":"Simple Fusion: Return of the Language Model","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Felix Stahlberg, James Cross, Veselin Stoyanov","submitted_at":"2018-09-01T06:39:56Z","abstract_excerpt":"Neural Machine Translation (NMT) typically leverages monolingual data in training through backtranslation. We investigate an alternative simple method to use monolingual data for NMT training: We combine the scores of a pre-trained and fixed language model (LM) with the scores of a translation model (TM) while the TM is trained from scratch. To achieve that, we train the translation model to predict the residual probability of the training data added to the prediction of the LM. This enables the TM to focus its capacity on modeling the source sentence since it can rely on the LM for fluency. W"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.00125","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-01T06:39:56Z","cross_cats_sorted":[],"title_canon_sha256":"fa92eccb4d35371e06d6b861d09538595a7462bef076a931316550e6dcf77f51","abstract_canon_sha256":"8175284b81b774d9f81e4ddc1bc9b7b6af579e4c0bbcee0c376e88ebf0c3ac5b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:38.395352Z","signature_b64":"oeTtSIPtd70ahzsYHLqTetugbKv9tUutgDLj4ClBLjh3GnJVOmLiSScZgeR2lA1EPaYKmD/P9+b4s5frVeHfDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"12b91fd652e7007d5d7fd78d19347c399627028885c169cf48c1deb5f19a42f3","last_reissued_at":"2026-05-17T23:55:38.394952Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:38.394952Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Simple Fusion: Return of the Language Model","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Felix Stahlberg, James Cross, Veselin Stoyanov","submitted_at":"2018-09-01T06:39:56Z","abstract_excerpt":"Neural Machine Translation (NMT) typically leverages monolingual data in training through backtranslation. We investigate an alternative simple method to use monolingual data for NMT training: We combine the scores of a pre-trained and fixed language model (LM) with the scores of a translation model (TM) while the TM is trained from scratch. To achieve that, we train the translation model to predict the residual probability of the training data added to the prediction of the LM. This enables the TM to focus its capacity on modeling the source sentence since it can rely on the LM for fluency. W"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.00125","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.00125","created_at":"2026-05-17T23:55:38.395021+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.00125v2","created_at":"2026-05-17T23:55:38.395021+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.00125","created_at":"2026-05-17T23:55:38.395021+00:00"},{"alias_kind":"pith_short_12","alias_value":"CK4R7VSS44AH","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_16","alias_value":"CK4R7VSS44AH2XL7","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_8","alias_value":"CK4R7VSS","created_at":"2026-05-18T12:32:16.446611+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG","json":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG.json","graph_json":"https://pith.science/api/pith-number/CK4R7VSS44AH2XL726GRSND4HG/graph.json","events_json":"https://pith.science/api/pith-number/CK4R7VSS44AH2XL726GRSND4HG/events.json","paper":"https://pith.science/paper/CK4R7VSS"},"agent_actions":{"view_html":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG","download_json":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG.json","view_paper":"https://pith.science/paper/CK4R7VSS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.00125&json=true","fetch_graph":"https://pith.science/api/pith-number/CK4R7VSS44AH2XL726GRSND4HG/graph.json","fetch_events":"https://pith.science/api/pith-number/CK4R7VSS44AH2XL726GRSND4HG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG/action/storage_attestation","attest_author":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG/action/author_attestation","sign_citation":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG/action/citation_signature","submit_replication":"https://pith.science/pith/CK4R7VSS44AH2XL726GRSND4HG/action/replication_record"}},"created_at":"2026-05-17T23:55:38.395021+00:00","updated_at":"2026-05-17T23:55:38.395021+00:00"}