{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:3DBLONQJRK53TERQVYUEROCQP6","short_pith_number":"pith:3DBLONQJ","canonical_record":{"source":{"id":"1802.03753","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-11T15:37:37Z","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"title_canon_sha256":"9d457ff70782f7d637bef20486574880d77aeba5fa8a120ce0050dfc2a7b03d9","abstract_canon_sha256":"98719512e1b098c3253801f4c84e8a1996ebcc0a62b3ff969b4de8c6aaf3bb91"},"schema_version":"1.0"},"canonical_sha256":"d8c2b736098abbb99230ae2848b8507fada566eeac8133179269019634d7af5a","source":{"kind":"arxiv","id":"1802.03753","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.03753","created_at":"2026-05-18T00:23:50Z"},{"alias_kind":"arxiv_version","alias_value":"1802.03753v1","created_at":"2026-05-18T00:23:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.03753","created_at":"2026-05-18T00:23:50Z"},{"alias_kind":"pith_short_12","alias_value":"3DBLONQJRK53","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"3DBLONQJRK53TERQ","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"3DBLONQJ","created_at":"2026-05-18T12:32:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:3DBLONQJRK53TERQVYUEROCQP6","target":"record","payload":{"canonical_record":{"source":{"id":"1802.03753","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-11T15:37:37Z","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"title_canon_sha256":"9d457ff70782f7d637bef20486574880d77aeba5fa8a120ce0050dfc2a7b03d9","abstract_canon_sha256":"98719512e1b098c3253801f4c84e8a1996ebcc0a62b3ff969b4de8c6aaf3bb91"},"schema_version":"1.0"},"canonical_sha256":"d8c2b736098abbb99230ae2848b8507fada566eeac8133179269019634d7af5a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:23:50.567434Z","signature_b64":"j3JXdnOjElMDHrhBDS2bNbwNdERsN8g6WV53q7kCEzR8hAdx6nSzhcFgyjIt5SBcv4FiDKVQrHbzr7RDyyEDDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d8c2b736098abbb99230ae2848b8507fada566eeac8133179269019634d7af5a","last_reissued_at":"2026-05-18T00:23:50.566891Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:23:50.566891Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1802.03753","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:23:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"46eSG5B5RqwG2PeIRCT6kNlGMbONzr5Eao3BDeLoypJ7y1yrcB/hhwXoEpbYJMLgA6dlYfLDRw4ufidsOI8cAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T07:12:45.333475Z"},"content_sha256":"922dd470b7e4a0d864e3b0f99dc79a970144873c57a740afb2ac167542260c76","schema_version":"1.0","event_id":"sha256:922dd470b7e4a0d864e3b0f99dc79a970144873c57a740afb2ac167542260c76"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:3DBLONQJRK53TERQVYUEROCQP6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Sample Efficient Deep Reinforcement Learning for Dialogue Systems with Large Action Spaces","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Gell\\'ert Weisz, Milica Ga\\v{s}i\\'c, Pawe{\\l} Budzianowski, Pei-Hao Su","submitted_at":"2018-02-11T15:37:37Z","abstract_excerpt":"In spoken dialogue systems, we aim to deploy artificial intelligence to build automated dialogue agents that can converse with humans. A part of this effort is the policy optimisation task, which attempts to find a policy describing how to respond to humans, in the form of a function taking the current state of the dialogue and returning the response of the system. In this paper, we investigate deep reinforcement learning approaches to solve this problem. Particular attention is given to actor-critic methods, off-policy reinforcement learning with experience replay, and various methods aimed a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.03753","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:23:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Y93ti38xhpuv/KHGAPrtq4XTn6UzH09HYH4DhzrU12ojwSbvkHUwOzxhtxPjXefl1lYbTQII5azeNJijMhBjAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T07:12:45.334073Z"},"content_sha256":"1434b261156207e2f9e82adb622090df263ed22f1ac81331bd9fb3977138b2be","schema_version":"1.0","event_id":"sha256:1434b261156207e2f9e82adb622090df263ed22f1ac81331bd9fb3977138b2be"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3DBLONQJRK53TERQVYUEROCQP6/bundle.json","state_url":"https://pith.science/pith/3DBLONQJRK53TERQVYUEROCQP6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3DBLONQJRK53TERQVYUEROCQP6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T07:12:45Z","links":{"resolver":"https://pith.science/pith/3DBLONQJRK53TERQVYUEROCQP6","bundle":"https://pith.science/pith/3DBLONQJRK53TERQVYUEROCQP6/bundle.json","state":"https://pith.science/pith/3DBLONQJRK53TERQVYUEROCQP6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3DBLONQJRK53TERQVYUEROCQP6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:3DBLONQJRK53TERQVYUEROCQP6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"98719512e1b098c3253801f4c84e8a1996ebcc0a62b3ff969b4de8c6aaf3bb91","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-11T15:37:37Z","title_canon_sha256":"9d457ff70782f7d637bef20486574880d77aeba5fa8a120ce0050dfc2a7b03d9"},"schema_version":"1.0","source":{"id":"1802.03753","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.03753","created_at":"2026-05-18T00:23:50Z"},{"alias_kind":"arxiv_version","alias_value":"1802.03753v1","created_at":"2026-05-18T00:23:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.03753","created_at":"2026-05-18T00:23:50Z"},{"alias_kind":"pith_short_12","alias_value":"3DBLONQJRK53","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"3DBLONQJRK53TERQ","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"3DBLONQJ","created_at":"2026-05-18T12:32:02Z"}],"graph_snapshots":[{"event_id":"sha256:1434b261156207e2f9e82adb622090df263ed22f1ac81331bd9fb3977138b2be","target":"graph","created_at":"2026-05-18T00:23:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In spoken dialogue systems, we aim to deploy artificial intelligence to build automated dialogue agents that can converse with humans. A part of this effort is the policy optimisation task, which attempts to find a policy describing how to respond to humans, in the form of a function taking the current state of the dialogue and returning the response of the system. In this paper, we investigate deep reinforcement learning approaches to solve this problem. Particular attention is given to actor-critic methods, off-policy reinforcement learning with experience replay, and various methods aimed a","authors_text":"Gell\\'ert Weisz, Milica Ga\\v{s}i\\'c, Pawe{\\l} Budzianowski, Pei-Hao Su","cross_cats":["cs.AI","cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-11T15:37:37Z","title":"Sample Efficient Deep Reinforcement Learning for Dialogue Systems with Large Action Spaces"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.03753","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:922dd470b7e4a0d864e3b0f99dc79a970144873c57a740afb2ac167542260c76","target":"record","created_at":"2026-05-18T00:23:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"98719512e1b098c3253801f4c84e8a1996ebcc0a62b3ff969b4de8c6aaf3bb91","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-11T15:37:37Z","title_canon_sha256":"9d457ff70782f7d637bef20486574880d77aeba5fa8a120ce0050dfc2a7b03d9"},"schema_version":"1.0","source":{"id":"1802.03753","kind":"arxiv","version":1}},"canonical_sha256":"d8c2b736098abbb99230ae2848b8507fada566eeac8133179269019634d7af5a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d8c2b736098abbb99230ae2848b8507fada566eeac8133179269019634d7af5a","first_computed_at":"2026-05-18T00:23:50.566891Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:23:50.566891Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"j3JXdnOjElMDHrhBDS2bNbwNdERsN8g6WV53q7kCEzR8hAdx6nSzhcFgyjIt5SBcv4FiDKVQrHbzr7RDyyEDDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:23:50.567434Z","signed_message":"canonical_sha256_bytes"},"source_id":"1802.03753","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:922dd470b7e4a0d864e3b0f99dc79a970144873c57a740afb2ac167542260c76","sha256:1434b261156207e2f9e82adb622090df263ed22f1ac81331bd9fb3977138b2be"],"state_sha256":"b6b7bd58b15a2292066aa72dc86b567204591216c1619ff7d9113635f56e938a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LA94rCdJM4VTIhg42utTOlbuxpQlIF8WydLmDWimLBgWZDPBAS7BMCSoEwKIe8N/S2eupjJRLoW7nzME05zZAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T07:12:45.337346Z","bundle_sha256":"a543e86a0f8374e0c6dd0bf95817c4e6efbed2db64c4ce3400277b67b803b60b"}}