{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QLSM76VMA5KPRGJZMTHSLJUUJB","merge_version":"pith-open-graph-merge-v1","event_count":3,"valid_event_count":3,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"888d9dabcc3bda53c71fc2b98d89ea7e1f39f70a1fd7380d95bbcfa936e07d9e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:29:11Z","title_canon_sha256":"9b56ec121339d94c129fa3c84b0c6d81d58b1987b84b25379563b9d2a64b3eae"},"schema_version":"1.0","source":{"id":"2605.14057","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14057","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14057v1","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14057","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"pith_short_12","alias_value":"QLSM76VMA5KP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QLSM76VMA5KPRGJZ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QLSM76VM","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:31f35ec907812a01ac96bce5679aa3e804a805b0d469716563360ab259c147d7","target":"graph","created_at":"2026-05-17T23:39:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Evaluations on a U.S. Supreme Court dataset show that our method outperforms various baselines across multiple metrics."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The U.S. Supreme Court dataset captures representative judicial questioning patterns and that the dual RL agents can learn effective strategies aligned with legal objectives without additional human feedback or validation."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A dual hierarchical RL framework lets agents learn when and how to ask probing questions in U.S. Supreme Court arguments, outperforming baselines on a court dataset."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A dual hierarchical reinforcement learning method lets conversational agents proactively extract information by coordinating high-level strategy and low-level question generation in legal dialogues."}],"snapshot_sha256":"d067827b7a8d05253bdd05c232bd547b576be46fb58f4b7fd76e0a0fb29bad63"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Most existing dialogue systems are user-driven, primarily designed to fulfill user requests. However, in many critical real-world scenarios, a conversational agent must proactively extract information to achieve its own objectives rather than merely respond. To address this gap, we introduce \\emph{Inquisitive Conversational Agents (ICAs)} and develop an ICA specifically tailored to U.S. Supreme Court oral arguments. We propose a Dual Hierarchical Reinforcement Learning framework featuring two cooperating RL agents, each with its own policy, to coordinate strategic dialogue management and fine-","authors_text":"Grace Hui Yang, Shihao Wang, Xubo Lin, Yang Deng, Zezhii Deng","cross_cats":[],"headline":"A dual hierarchical reinforcement learning method lets conversational agents proactively extract information by coordinating high-level strategy and low-level question generation in legal dialogues.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:29:11Z","title":"Dual Hierarchical Dialogue Policy Learning for Legal Inquisitive Conversational Agents"},"references":{"count":206,"internal_anchors":21,"resolved_work":206,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"and Shen, Yelong and Wallis, Phillip and Allen-Zhu, Zeyuan and Li, Yuanzhi and Wang, Shean and Chen, Weizhu , booktitle =","work_id":"5986ff2f-00dc-4b1b-92e8-9c8e22620c26","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Proceedings of the 17th Annual Meeting of the Special Interest Group on Discourse and Dialogue , year =","work_id":"6e9ebdfd-8c57-44f0-86ba-ff95d263587b","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Multiwoz–a large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling","work_id":"9d20f47a-38c7-4f68-ae4c-ce63d559a7b2","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Proceedings of the 15th Annual Meeting of the Special Interest Group on Discourse and Dialogue , year =","work_id":"9305a5df-dd04-4eaa-a5c5-40272e136356","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing (EMNLP) , year =","work_id":"4c5cc07e-0e81-4b07-9274-5611697a3657","year":2016}],"snapshot_sha256":"e9be163bad0b2e17623ef08b7ec1d8b8ace24d03768785973792efc52d0db89d"},"source":{"id":"2605.14057","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T05:17:18.907754Z","id":"16196cd9-fdcc-41ba-838d-91ef57de8311","model_set":{"reader":"grok-4.3"},"one_line_summary":"A dual hierarchical RL framework lets agents learn when and how to ask probing questions in U.S. Supreme Court arguments, outperforming baselines on a court dataset.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A dual hierarchical reinforcement learning method lets conversational agents proactively extract information by coordinating high-level strategy and low-level question generation in legal dialogues.","strongest_claim":"Evaluations on a U.S. Supreme Court dataset show that our method outperforms various baselines across multiple metrics.","weakest_assumption":"The U.S. Supreme Court dataset captures representative judicial questioning patterns and that the dual RL agents can learn effective strategies aligned with legal objectives without additional human feedback or validation."}},"verdict_id":"16196cd9-fdcc-41ba-838d-91ef57de8311"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ca65c788422b034132c7d7afbdc49a22380143f5e25b32e2116ec955e28c364","target":"record","created_at":"2026-05-17T23:39:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"888d9dabcc3bda53c71fc2b98d89ea7e1f39f70a1fd7380d95bbcfa936e07d9e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:29:11Z","title_canon_sha256":"9b56ec121339d94c129fa3c84b0c6d81d58b1987b84b25379563b9d2a64b3eae"},"schema_version":"1.0","source":{"id":"2605.14057","kind":"arxiv","version":1}},"canonical_sha256":"82e4cffaac0754f8993964cf25a69448621e04f30cee1cd7c1e06e9d5f3ecc51","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82e4cffaac0754f8993964cf25a69448621e04f30cee1cd7c1e06e9d5f3ecc51","first_computed_at":"2026-05-17T23:39:12.573495Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:12.573495Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lRZQMrNrUl9R1rIm5Uw3bsh80zR3RvMbzbjsAaloheNci/6Y3qDoIlNi/xqlblA900gYpP1YEV1hmCaaB52uAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:12.574169Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14057","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5ca65c788422b034132c7d7afbdc49a22380143f5e25b32e2116ec955e28c364","sha256:31f35ec907812a01ac96bce5679aa3e804a805b0d469716563360ab259c147d7","sha256:5f01c5ca0da5304ca79b60b56194a57ea1f5c938d4403b3ec84e5f7abc1f1b78"],"state_sha256":"fc818042c63a33d7a5b5a731ba676d5b07e733e20ffca87d23f781e2387f0c38"}