{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:SYL3LYBOXVGC2OTLXMQYS2ISU4","short_pith_number":"pith:SYL3LYBO","schema_version":"1.0","canonical_sha256":"9617b5e02ebd4c2d3a6bbb21896912a737095f1c6fb376d83c35e05a3f66f62d","source":{"kind":"arxiv","id":"1706.06210","version":2},"attestation_state":"computed","paper":{"title":"Sub-domain Modelling for Dialogue Management with Hierarchical Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"I\\~nigo Casanueva, Lina Rojas-Barahona, Milica Ga\\v{s}i\\'c, Nikola Mrk\\v{s}i\\'c, Pawe{\\l} Budzianowski, Pei-Hao Su, Stefan Ultes, Tsung-Hsien Wen","submitted_at":"2017-06-19T23:15:22Z","abstract_excerpt":"Human conversation is inherently complex, often spanning many different topics/domains. This makes policy learning for dialogue systems very challenging. Standard flat reinforcement learning methods do not provide an efficient framework for modelling such dialogues. In this paper, we focus on the under-explored problem of multi-domain dialogue management. First, we propose a new method for hierarchical reinforcement learning using the option framework. Next, we show that the proposed architecture learns faster and arrives at a better policy than the existing flat ones do. Moreover, we show how"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1706.06210","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-06-19T23:15:22Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"b5b2ea7e02d22b05e60fbf900d4176d2089f9eeea74532249309fe421e38cd72","abstract_canon_sha256":"d85e014024fa83777e80bc7672ce0e48f4909804880fc8757eb2a27fa3f35e19"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:40:12.747608Z","signature_b64":"Q5etIDSyB/2xsdlXHff+S6TY3oekuNBAKep+U/AnEG40qjqPQnf8ImSxvJME1vvvuY/S4W8I/nCQzcTN7f4DCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9617b5e02ebd4c2d3a6bbb21896912a737095f1c6fb376d83c35e05a3f66f62d","last_reissued_at":"2026-05-18T00:40:12.746938Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:40:12.746938Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Sub-domain Modelling for Dialogue Management with Hierarchical Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"I\\~nigo Casanueva, Lina Rojas-Barahona, Milica Ga\\v{s}i\\'c, Nikola Mrk\\v{s}i\\'c, Pawe{\\l} Budzianowski, Pei-Hao Su, Stefan Ultes, Tsung-Hsien Wen","submitted_at":"2017-06-19T23:15:22Z","abstract_excerpt":"Human conversation is inherently complex, often spanning many different topics/domains. This makes policy learning for dialogue systems very challenging. Standard flat reinforcement learning methods do not provide an efficient framework for modelling such dialogues. In this paper, we focus on the under-explored problem of multi-domain dialogue management. First, we propose a new method for hierarchical reinforcement learning using the option framework. Next, we show that the proposed architecture learns faster and arrives at a better policy than the existing flat ones do. Moreover, we show how"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.06210","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1706.06210","created_at":"2026-05-18T00:40:12.747049+00:00"},{"alias_kind":"arxiv_version","alias_value":"1706.06210v2","created_at":"2026-05-18T00:40:12.747049+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.06210","created_at":"2026-05-18T00:40:12.747049+00:00"},{"alias_kind":"pith_short_12","alias_value":"SYL3LYBOXVGC","created_at":"2026-05-18T12:31:43.269735+00:00"},{"alias_kind":"pith_short_16","alias_value":"SYL3LYBOXVGC2OTL","created_at":"2026-05-18T12:31:43.269735+00:00"},{"alias_kind":"pith_short_8","alias_value":"SYL3LYBO","created_at":"2026-05-18T12:31:43.269735+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4","json":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4.json","graph_json":"https://pith.science/api/pith-number/SYL3LYBOXVGC2OTLXMQYS2ISU4/graph.json","events_json":"https://pith.science/api/pith-number/SYL3LYBOXVGC2OTLXMQYS2ISU4/events.json","paper":"https://pith.science/paper/SYL3LYBO"},"agent_actions":{"view_html":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4","download_json":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4.json","view_paper":"https://pith.science/paper/SYL3LYBO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1706.06210&json=true","fetch_graph":"https://pith.science/api/pith-number/SYL3LYBOXVGC2OTLXMQYS2ISU4/graph.json","fetch_events":"https://pith.science/api/pith-number/SYL3LYBOXVGC2OTLXMQYS2ISU4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4/action/storage_attestation","attest_author":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4/action/author_attestation","sign_citation":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4/action/citation_signature","submit_replication":"https://pith.science/pith/SYL3LYBOXVGC2OTLXMQYS2ISU4/action/replication_record"}},"created_at":"2026-05-18T00:40:12.747049+00:00","updated_at":"2026-05-18T00:40:12.747049+00:00"}