{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:HGLHK3IHSK2CIYMA3QZVYUFW6B","short_pith_number":"pith:HGLHK3IH","schema_version":"1.0","canonical_sha256":"3996756d0792b4246180dc335c50b6f04a97bb5ee6cb578155dd94d3f2d20f2f","source":{"kind":"arxiv","id":"1811.09740","version":2},"attestation_state":"computed","paper":{"title":"Connecting the Dots Between MLE and RL for Sequence Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Bowen Tan, Eric Xing, Ruslan Salakhutdinov, Zhiting Hu, Zichao Yang","submitted_at":"2018-11-24T01:33:39Z","abstract_excerpt":"Sequence prediction models can be learned from example sequences with a variety of training algorithms. Maximum likelihood learning is simple and efficient, yet can suffer from compounding error at test time. Reinforcement learning such as policy gradient addresses the issue but can have prohibitively poor exploration efficiency. A rich set of other algorithms such as RAML, SPG, and data noising, have also been developed from different perspectives. This paper establishes a formal connection between these algorithms. We present a generalized entropy regularized policy optimization formulation,"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.09740","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-24T01:33:39Z","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"title_canon_sha256":"0e4ecd781ae82654f70d8d19c692b7f00bb0c6b20c7e451e6b6ad993388bdf2f","abstract_canon_sha256":"7f7c7563d414b7e1c189e2e871f9df1d8414e58880ff442fd2395bb249049bf9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:53.674630Z","signature_b64":"BJmxmmJ2NZj0p9GHCf0kwD02ZSuc8BzNRzBK2SR8aCSjj5un0GJDyyLomEhJV0KT7J8KZT6x/An9bsuqIkKOAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3996756d0792b4246180dc335c50b6f04a97bb5ee6cb578155dd94d3f2d20f2f","last_reissued_at":"2026-05-17T23:41:53.674158Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:53.674158Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Connecting the Dots Between MLE and RL for Sequence Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Bowen Tan, Eric Xing, Ruslan Salakhutdinov, Zhiting Hu, Zichao Yang","submitted_at":"2018-11-24T01:33:39Z","abstract_excerpt":"Sequence prediction models can be learned from example sequences with a variety of training algorithms. Maximum likelihood learning is simple and efficient, yet can suffer from compounding error at test time. Reinforcement learning such as policy gradient addresses the issue but can have prohibitively poor exploration efficiency. A rich set of other algorithms such as RAML, SPG, and data noising, have also been developed from different perspectives. This paper establishes a formal connection between these algorithms. We present a generalized entropy regularized policy optimization formulation,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.09740","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.09740","created_at":"2026-05-17T23:41:53.674220+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.09740v2","created_at":"2026-05-17T23:41:53.674220+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.09740","created_at":"2026-05-17T23:41:53.674220+00:00"},{"alias_kind":"pith_short_12","alias_value":"HGLHK3IHSK2C","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"HGLHK3IHSK2CIYMA","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"HGLHK3IH","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B","json":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B.json","graph_json":"https://pith.science/api/pith-number/HGLHK3IHSK2CIYMA3QZVYUFW6B/graph.json","events_json":"https://pith.science/api/pith-number/HGLHK3IHSK2CIYMA3QZVYUFW6B/events.json","paper":"https://pith.science/paper/HGLHK3IH"},"agent_actions":{"view_html":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B","download_json":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B.json","view_paper":"https://pith.science/paper/HGLHK3IH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.09740&json=true","fetch_graph":"https://pith.science/api/pith-number/HGLHK3IHSK2CIYMA3QZVYUFW6B/graph.json","fetch_events":"https://pith.science/api/pith-number/HGLHK3IHSK2CIYMA3QZVYUFW6B/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B/action/storage_attestation","attest_author":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B/action/author_attestation","sign_citation":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B/action/citation_signature","submit_replication":"https://pith.science/pith/HGLHK3IHSK2CIYMA3QZVYUFW6B/action/replication_record"}},"created_at":"2026-05-17T23:41:53.674220+00:00","updated_at":"2026-05-17T23:41:53.674220+00:00"}