{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:KGLWLR3V5BDNN6TLEGVFMENTXF","short_pith_number":"pith:KGLWLR3V","schema_version":"1.0","canonical_sha256":"519765c775e846d6fa6b21aa5611b3b957f83e218d8292681410978b995485f0","source":{"kind":"arxiv","id":"1902.08858","version":2},"attestation_state":"computed","paper":{"title":"Rethinking Action Spaces for Reinforcement Learning in End-to-end Dialog Agents with Latent Variable Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Kaige Xie, Maxine Eskenazi, Tiancheng Zhao","submitted_at":"2019-02-23T22:27:45Z","abstract_excerpt":"Defining action spaces for conversational agents and optimizing their decision-making process with reinforcement learning is an enduring challenge. Common practice has been to use handcrafted dialog acts, or the output vocabulary, e.g. in neural encoder decoders, as the action spaces. Both have their own limitations. This paper proposes a novel latent action framework that treats the action spaces of an end-to-end dialog agent as latent variables and develops unsupervised methods in order to induce its own action space from the data. Comprehensive experiments are conducted examining both conti"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.08858","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-02-23T22:27:45Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f0e99c336f33599dc0b83bd01bc145df3067bd5bb3a5d49be9b6b52795aeb83f","abstract_canon_sha256":"ab127504599354181bcf00eed6bcdae6746c9b57e90223d46dddb63c6c8cb13a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:39.458415Z","signature_b64":"RVdurd0Lgimraauq0E9y69E7NfGEHOb9/pPrFoma7ELiyxTCmmJp6YEb8d434HNYjBlWdVig/HcFnpbguBW8AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"519765c775e846d6fa6b21aa5611b3b957f83e218d8292681410978b995485f0","last_reissued_at":"2026-05-17T23:48:39.457867Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:39.457867Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Rethinking Action Spaces for Reinforcement Learning in End-to-end Dialog Agents with Latent Variable Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Kaige Xie, Maxine Eskenazi, Tiancheng Zhao","submitted_at":"2019-02-23T22:27:45Z","abstract_excerpt":"Defining action spaces for conversational agents and optimizing their decision-making process with reinforcement learning is an enduring challenge. Common practice has been to use handcrafted dialog acts, or the output vocabulary, e.g. in neural encoder decoders, as the action spaces. Both have their own limitations. This paper proposes a novel latent action framework that treats the action spaces of an end-to-end dialog agent as latent variables and develops unsupervised methods in order to induce its own action space from the data. Comprehensive experiments are conducted examining both conti"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.08858","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.08858","created_at":"2026-05-17T23:48:39.457952+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.08858v2","created_at":"2026-05-17T23:48:39.457952+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.08858","created_at":"2026-05-17T23:48:39.457952+00:00"},{"alias_kind":"pith_short_12","alias_value":"KGLWLR3V5BDN","created_at":"2026-05-18T12:33:21.387695+00:00"},{"alias_kind":"pith_short_16","alias_value":"KGLWLR3V5BDNN6TL","created_at":"2026-05-18T12:33:21.387695+00:00"},{"alias_kind":"pith_short_8","alias_value":"KGLWLR3V","created_at":"2026-05-18T12:33:21.387695+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.14057","citing_title":"Dual Hierarchical Dialogue Policy Learning for Legal Inquisitive Conversational Agents","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14057","citing_title":"Dual Hierarchical Dialogue Policy Learning for Legal Inquisitive Conversational Agents","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18354","citing_title":"PRISMA: Preference-Reinforced Self-Training Approach for Interpretable Emotionally Intelligent Negotiation Dialogues","ref_index":14,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF","json":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF.json","graph_json":"https://pith.science/api/pith-number/KGLWLR3V5BDNN6TLEGVFMENTXF/graph.json","events_json":"https://pith.science/api/pith-number/KGLWLR3V5BDNN6TLEGVFMENTXF/events.json","paper":"https://pith.science/paper/KGLWLR3V"},"agent_actions":{"view_html":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF","download_json":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF.json","view_paper":"https://pith.science/paper/KGLWLR3V","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.08858&json=true","fetch_graph":"https://pith.science/api/pith-number/KGLWLR3V5BDNN6TLEGVFMENTXF/graph.json","fetch_events":"https://pith.science/api/pith-number/KGLWLR3V5BDNN6TLEGVFMENTXF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF/action/storage_attestation","attest_author":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF/action/author_attestation","sign_citation":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF/action/citation_signature","submit_replication":"https://pith.science/pith/KGLWLR3V5BDNN6TLEGVFMENTXF/action/replication_record"}},"created_at":"2026-05-17T23:48:39.457952+00:00","updated_at":"2026-05-17T23:48:39.457952+00:00"}