{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:ZKNN3NXXDAOHLCCRVX7B3JVQQH","short_pith_number":"pith:ZKNN3NXX","canonical_record":{"source":{"id":"1606.01541","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2016-06-05T17:59:23Z","cross_cats_sorted":[],"title_canon_sha256":"b7fa12768e632e3ce740adec0b037dd9d31668cdb1a9b6857ee9651ec2b9ee2d","abstract_canon_sha256":"6f8d23be3082e276cedfb090b9fa970a81fae57b8816f7fa9356231e6fd985aa"},"schema_version":"1.0"},"canonical_sha256":"ca9addb6f7181c758851adfe1da6b081fd8bb7b43776424a311404ae0b64b61f","source":{"kind":"arxiv","id":"1606.01541","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.01541","created_at":"2026-05-18T01:03:40Z"},{"alias_kind":"arxiv_version","alias_value":"1606.01541v4","created_at":"2026-05-18T01:03:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.01541","created_at":"2026-05-18T01:03:40Z"},{"alias_kind":"pith_short_12","alias_value":"ZKNN3NXXDAOH","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_16","alias_value":"ZKNN3NXXDAOHLCCR","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_8","alias_value":"ZKNN3NXX","created_at":"2026-05-18T12:30:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:ZKNN3NXXDAOHLCCRVX7B3JVQQH","target":"record","payload":{"canonical_record":{"source":{"id":"1606.01541","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2016-06-05T17:59:23Z","cross_cats_sorted":[],"title_canon_sha256":"b7fa12768e632e3ce740adec0b037dd9d31668cdb1a9b6857ee9651ec2b9ee2d","abstract_canon_sha256":"6f8d23be3082e276cedfb090b9fa970a81fae57b8816f7fa9356231e6fd985aa"},"schema_version":"1.0"},"canonical_sha256":"ca9addb6f7181c758851adfe1da6b081fd8bb7b43776424a311404ae0b64b61f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:03:40.609591Z","signature_b64":"l7vHoaDptQ+//cB2KRqFS3axBGuwrKokkh36412ho6K/g/6SqzAjmgokr/4qjQuNxddVk1SqqQWust5clBrgDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ca9addb6f7181c758851adfe1da6b081fd8bb7b43776424a311404ae0b64b61f","last_reissued_at":"2026-05-18T01:03:40.608863Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:03:40.608863Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1606.01541","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:03:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lrOOh97bp7ZeYdGQgiz7IKMyyFaTRbxzAiOM8OGiApi0FQlCQjUo6a9Xabm+nqteOfLXqcBPQ0CUQojzkeYzAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T00:11:42.957640Z"},"content_sha256":"ba7cea05b0e9e9132d26075ffeb7370caccb5b597a0287af048e86180f445d44","schema_version":"1.0","event_id":"sha256:ba7cea05b0e9e9132d26075ffeb7370caccb5b597a0287af048e86180f445d44"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:ZKNN3NXXDAOHLCCRVX7B3JVQQH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Deep Reinforcement Learning for Dialogue Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Alan Ritter, Dan Jurafsky, Jianfeng Gao, Jiwei Li, Michel Galley, Will Monroe","submitted_at":"2016-06-05T17:59:23Z","abstract_excerpt":"Recent neural models of dialogue generation offer great promise for generating responses for conversational agents, but tend to be shortsighted, predicting utterances one at a time while ignoring their influence on future outcomes. Modeling the future direction of a dialogue is crucial to generating coherent, interesting dialogues, a need which led traditional NLP models of dialogue to draw on reinforcement learning. In this paper, we show how to integrate these goals, applying deep reinforcement learning to model future reward in chatbot dialogue. The model simulates dialogues between two vir"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.01541","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:03:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0RornL+8GkREqsQ1Moi6ZFyWzSKhK0YX8dq5LC9N08wovpMsPv41K5e2GiTdMCGljUGqa2YaYPoe8F8VkQ2oDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T00:11:42.957988Z"},"content_sha256":"7062f6ffdc22e3054f153d4c24958237ac5f494fdd532890eb491047a433583a","schema_version":"1.0","event_id":"sha256:7062f6ffdc22e3054f153d4c24958237ac5f494fdd532890eb491047a433583a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH/bundle.json","state_url":"https://pith.science/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T00:11:42Z","links":{"resolver":"https://pith.science/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH","bundle":"https://pith.science/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH/bundle.json","state":"https://pith.science/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZKNN3NXXDAOHLCCRVX7B3JVQQH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:ZKNN3NXXDAOHLCCRVX7B3JVQQH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6f8d23be3082e276cedfb090b9fa970a81fae57b8816f7fa9356231e6fd985aa","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2016-06-05T17:59:23Z","title_canon_sha256":"b7fa12768e632e3ce740adec0b037dd9d31668cdb1a9b6857ee9651ec2b9ee2d"},"schema_version":"1.0","source":{"id":"1606.01541","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.01541","created_at":"2026-05-18T01:03:40Z"},{"alias_kind":"arxiv_version","alias_value":"1606.01541v4","created_at":"2026-05-18T01:03:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.01541","created_at":"2026-05-18T01:03:40Z"},{"alias_kind":"pith_short_12","alias_value":"ZKNN3NXXDAOH","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_16","alias_value":"ZKNN3NXXDAOHLCCR","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_8","alias_value":"ZKNN3NXX","created_at":"2026-05-18T12:30:53Z"}],"graph_snapshots":[{"event_id":"sha256:7062f6ffdc22e3054f153d4c24958237ac5f494fdd532890eb491047a433583a","target":"graph","created_at":"2026-05-18T01:03:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recent neural models of dialogue generation offer great promise for generating responses for conversational agents, but tend to be shortsighted, predicting utterances one at a time while ignoring their influence on future outcomes. Modeling the future direction of a dialogue is crucial to generating coherent, interesting dialogues, a need which led traditional NLP models of dialogue to draw on reinforcement learning. In this paper, we show how to integrate these goals, applying deep reinforcement learning to model future reward in chatbot dialogue. The model simulates dialogues between two vir","authors_text":"Alan Ritter, Dan Jurafsky, Jianfeng Gao, Jiwei Li, Michel Galley, Will Monroe","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2016-06-05T17:59:23Z","title":"Deep Reinforcement Learning for Dialogue Generation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.01541","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ba7cea05b0e9e9132d26075ffeb7370caccb5b597a0287af048e86180f445d44","target":"record","created_at":"2026-05-18T01:03:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6f8d23be3082e276cedfb090b9fa970a81fae57b8816f7fa9356231e6fd985aa","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2016-06-05T17:59:23Z","title_canon_sha256":"b7fa12768e632e3ce740adec0b037dd9d31668cdb1a9b6857ee9651ec2b9ee2d"},"schema_version":"1.0","source":{"id":"1606.01541","kind":"arxiv","version":4}},"canonical_sha256":"ca9addb6f7181c758851adfe1da6b081fd8bb7b43776424a311404ae0b64b61f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ca9addb6f7181c758851adfe1da6b081fd8bb7b43776424a311404ae0b64b61f","first_computed_at":"2026-05-18T01:03:40.608863Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:03:40.608863Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"l7vHoaDptQ+//cB2KRqFS3axBGuwrKokkh36412ho6K/g/6SqzAjmgokr/4qjQuNxddVk1SqqQWust5clBrgDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:03:40.609591Z","signed_message":"canonical_sha256_bytes"},"source_id":"1606.01541","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ba7cea05b0e9e9132d26075ffeb7370caccb5b597a0287af048e86180f445d44","sha256:7062f6ffdc22e3054f153d4c24958237ac5f494fdd532890eb491047a433583a"],"state_sha256":"1fffc8258f0cc17d30cf86adfb784425e09d166cc85bad8cc075bcff19f5e6e4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4HVsCfxVnJD/A1JsSHhr/270HEGY12S6EaHkvYGK89+Ho2puGnHhUuRh5+u8n99ZMRH2+cZvye36cpzId6jvCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T00:11:42.960387Z","bundle_sha256":"ec123fbce459b10ba0b0e6c913ab6057321a1190ed9265074ab45b9c60e9f86d"}}