{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:PDRQZQYHW2DOAODCP2YK4Y4WIG","short_pith_number":"pith:PDRQZQYH","schema_version":"1.0","canonical_sha256":"78e30cc307b686e038627eb0ae63964195ca34779d3b760bfd4a740e92aebd64","source":{"kind":"arxiv","id":"1808.08866","version":1},"attestation_state":"computed","paper":{"title":"A Study of Reinforcement Learning for Neural Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Fei Tian, Jianhuang Lai, Lijun Wu, Tao Qin, Tie-Yan Liu","submitted_at":"2018-08-27T14:43:38Z","abstract_excerpt":"Recent studies have shown that reinforcement learning (RL) is an effective approach for improving the performance of neural machine translation (NMT) system. However, due to its instability, successfully RL training is challenging, especially in real-world systems where deep models and large datasets are leveraged. In this paper, taking several large-scale translation tasks as testbeds, we conduct a systematic study on how to train better NMT models using reinforcement learning. We provide a comprehensive comparison of several important factors (e.g., baseline reward, reward shaping) in RL tra"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1808.08866","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-27T14:43:38Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"bdb1ee28b6e0fe1cc7f7ddb914912f76c8ef94ff1528d4f6f16334a10884200d","abstract_canon_sha256":"d6ab28c3fb398b82636f310d6f1a3a8bad6dc58c0c693c7c96d02d588ec097ac"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:07:12.997714Z","signature_b64":"RsnvOnZAPAvEXBbAVaxlZdD/15L/IKkMkTOXh+j1kvxcefZ1WhZDNFk1u5uvFpQcWitKwCjt5Zcuf0SzPj37Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"78e30cc307b686e038627eb0ae63964195ca34779d3b760bfd4a740e92aebd64","last_reissued_at":"2026-05-18T00:07:12.997223Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:07:12.997223Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Study of Reinforcement Learning for Neural Machine Translation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Fei Tian, Jianhuang Lai, Lijun Wu, Tao Qin, Tie-Yan Liu","submitted_at":"2018-08-27T14:43:38Z","abstract_excerpt":"Recent studies have shown that reinforcement learning (RL) is an effective approach for improving the performance of neural machine translation (NMT) system. However, due to its instability, successfully RL training is challenging, especially in real-world systems where deep models and large datasets are leveraged. In this paper, taking several large-scale translation tasks as testbeds, we conduct a systematic study on how to train better NMT models using reinforcement learning. We provide a comprehensive comparison of several important factors (e.g., baseline reward, reward shaping) in RL tra"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.08866","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1808.08866","created_at":"2026-05-18T00:07:12.997299+00:00"},{"alias_kind":"arxiv_version","alias_value":"1808.08866v1","created_at":"2026-05-18T00:07:12.997299+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.08866","created_at":"2026-05-18T00:07:12.997299+00:00"},{"alias_kind":"pith_short_12","alias_value":"PDRQZQYHW2DO","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"PDRQZQYHW2DOAODC","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"PDRQZQYH","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1906.09444","citing_title":"Retrieving Sequential Information for Non-Autoregressive Neural Machine Translation","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2509.03526","citing_title":"Enhancing Speech Large Language Models through Reinforced Behavior Alignment","ref_index":51,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG","json":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG.json","graph_json":"https://pith.science/api/pith-number/PDRQZQYHW2DOAODCP2YK4Y4WIG/graph.json","events_json":"https://pith.science/api/pith-number/PDRQZQYHW2DOAODCP2YK4Y4WIG/events.json","paper":"https://pith.science/paper/PDRQZQYH"},"agent_actions":{"view_html":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG","download_json":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG.json","view_paper":"https://pith.science/paper/PDRQZQYH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1808.08866&json=true","fetch_graph":"https://pith.science/api/pith-number/PDRQZQYHW2DOAODCP2YK4Y4WIG/graph.json","fetch_events":"https://pith.science/api/pith-number/PDRQZQYHW2DOAODCP2YK4Y4WIG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG/action/storage_attestation","attest_author":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG/action/author_attestation","sign_citation":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG/action/citation_signature","submit_replication":"https://pith.science/pith/PDRQZQYHW2DOAODCP2YK4Y4WIG/action/replication_record"}},"created_at":"2026-05-18T00:07:12.997299+00:00","updated_at":"2026-05-18T00:07:12.997299+00:00"}