{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:WLHOYRRI6QMBPMG27HHHPDEDDP","short_pith_number":"pith:WLHOYRRI","schema_version":"1.0","canonical_sha256":"b2ceec4628f41817b0daf9ce778c831bec53387b42875695f9e67c56d0468bc4","source":{"kind":"arxiv","id":"1705.04304","version":3},"attestation_state":"computed","paper":{"title":"A Deep Reinforced Model for Abstractive Summarization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Caiming Xiong, Richard Socher, Romain Paulus","submitted_at":"2017-05-11T17:39:35Z","abstract_excerpt":"Attentional, RNN-based encoder-decoder models for abstractive summarization have achieved good performance on short input and output sequences. For longer documents and summaries however these models often include repetitive and incoherent phrases. We introduce a neural network model with a novel intra-attention that attends over the input and continuously generated output separately, and a new training method that combines standard supervised word prediction and reinforcement learning (RL). Models trained only with supervised learning often exhibit \"exposure bias\" - they assume ground truth i"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1705.04304","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-11T17:39:35Z","cross_cats_sorted":[],"title_canon_sha256":"371f5643d159a9daec925add0dc795f3e90be32c4d5c9b3ebc5d3a2428793c3f","abstract_canon_sha256":"0cae99058ce7213d67dd2a92432286e6efda84bf70181e0d52b86b94c49a3bde"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:30:44.866870Z","signature_b64":"7luZHhi+jxMuGff+HYINWI/PrdOO2GoQ/OStloLUd4VzZJ9H3aBcEJs3Zj+4KFvPVdelCYpAK6cM+IH3ToCwBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b2ceec4628f41817b0daf9ce778c831bec53387b42875695f9e67c56d0468bc4","last_reissued_at":"2026-05-18T00:30:44.866246Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:30:44.866246Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Deep Reinforced Model for Abstractive Summarization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Caiming Xiong, Richard Socher, Romain Paulus","submitted_at":"2017-05-11T17:39:35Z","abstract_excerpt":"Attentional, RNN-based encoder-decoder models for abstractive summarization have achieved good performance on short input and output sequences. For longer documents and summaries however these models often include repetitive and incoherent phrases. We introduce a neural network model with a novel intra-attention that attends over the input and continuously generated output separately, and a new training method that combines standard supervised word prediction and reinforcement learning (RL). Models trained only with supervised learning often exhibit \"exposure bias\" - they assume ground truth i"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.04304","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1705.04304","created_at":"2026-05-18T00:30:44.866333+00:00"},{"alias_kind":"arxiv_version","alias_value":"1705.04304v3","created_at":"2026-05-18T00:30:44.866333+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.04304","created_at":"2026-05-18T00:30:44.866333+00:00"},{"alias_kind":"pith_short_12","alias_value":"WLHOYRRI6QMB","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_16","alias_value":"WLHOYRRI6QMBPMG2","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_8","alias_value":"WLHOYRRI","created_at":"2026-05-18T12:31:53.515858+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2009.01325","citing_title":"Learning to summarize from human feedback","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2409.12917","citing_title":"Training Language Models to Self-Correct via Reinforcement Learning","ref_index":164,"is_internal_anchor":true},{"citing_arxiv_id":"1912.06680","citing_title":"Dota 2 with Large Scale Deep Reinforcement Learning","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"1910.10683","citing_title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","ref_index":50,"is_internal_anchor":false},{"citing_arxiv_id":"1909.08593","citing_title":"Fine-Tuning Language Models from Human Preferences","ref_index":19,"is_internal_anchor":false},{"citing_arxiv_id":"1706.03762","citing_title":"Attention Is All You Need","ref_index":28,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP","json":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP.json","graph_json":"https://pith.science/api/pith-number/WLHOYRRI6QMBPMG27HHHPDEDDP/graph.json","events_json":"https://pith.science/api/pith-number/WLHOYRRI6QMBPMG27HHHPDEDDP/events.json","paper":"https://pith.science/paper/WLHOYRRI"},"agent_actions":{"view_html":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP","download_json":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP.json","view_paper":"https://pith.science/paper/WLHOYRRI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1705.04304&json=true","fetch_graph":"https://pith.science/api/pith-number/WLHOYRRI6QMBPMG27HHHPDEDDP/graph.json","fetch_events":"https://pith.science/api/pith-number/WLHOYRRI6QMBPMG27HHHPDEDDP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP/action/storage_attestation","attest_author":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP/action/author_attestation","sign_citation":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP/action/citation_signature","submit_replication":"https://pith.science/pith/WLHOYRRI6QMBPMG27HHHPDEDDP/action/replication_record"}},"created_at":"2026-05-18T00:30:44.866333+00:00","updated_at":"2026-05-18T00:30:44.866333+00:00"}