{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:WP57N2DFVZ2T2PMFOGMSK3OSQM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f912ace2af5f4817d117e4249b30bc7969190d2a5754eafada9440fff44f4983","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-05-24T21:56:08Z","title_canon_sha256":"f88112cb70859bd777518e090508ec5ba046ef19cade13823314a19ed6acd8cf"},"schema_version":"1.0","source":{"id":"1605.07669","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.07669","created_at":"2026-05-18T01:13:04Z"},{"alias_kind":"arxiv_version","alias_value":"1605.07669v2","created_at":"2026-05-18T01:13:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.07669","created_at":"2026-05-18T01:13:04Z"},{"alias_kind":"pith_short_12","alias_value":"WP57N2DFVZ2T","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"WP57N2DFVZ2T2PMF","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"WP57N2DF","created_at":"2026-05-18T12:30:51Z"}],"graph_snapshots":[{"event_id":"sha256:d29d7dbe46069e4225ed613f433319925f3ce07af4a7914006e1c7d42daa42dc","target":"graph","created_at":"2026-05-18T01:13:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The ability to compute an accurate reward function is essential for optimising a dialogue policy via reinforcement learning. In real-world applications, using explicit user feedback as the reward signal is often unreliable and costly to collect. This problem can be mitigated if the user's intent is known in advance or data is available to pre-train a task success predictor off-line. In practice neither of these apply for most real world applications. Here we propose an on-line learning framework whereby the dialogue policy is jointly trained alongside the reward model via active learning with ","authors_text":"David Vandyke, Lina Rojas-Barahona, Milica Gasic, Nikola Mrksic, Pei-Hao Su, Stefan Ultes, Steve Young, Tsung-Hsien Wen","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-05-24T21:56:08Z","title":"On-line Active Reward Learning for Policy Optimisation in Spoken Dialogue Systems"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.07669","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8355ad853bb9c472da71122a1a8592ffd51bcb344f176359a203e1ca23334312","target":"record","created_at":"2026-05-18T01:13:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f912ace2af5f4817d117e4249b30bc7969190d2a5754eafada9440fff44f4983","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-05-24T21:56:08Z","title_canon_sha256":"f88112cb70859bd777518e090508ec5ba046ef19cade13823314a19ed6acd8cf"},"schema_version":"1.0","source":{"id":"1605.07669","kind":"arxiv","version":2}},"canonical_sha256":"b3fbf6e865ae753d3d857199256dd2831708af181c47cf8b17f48269c726490f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b3fbf6e865ae753d3d857199256dd2831708af181c47cf8b17f48269c726490f","first_computed_at":"2026-05-18T01:13:04.457674Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:13:04.457674Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VMrL8cPfOzoCtsPqQOzQWO/40JF/8t6fgXKBshsWFuJbEhMoIrPctrzDCSO/rI+wYrbbaA6uw6Rv2C6g13syCg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:13:04.458035Z","signed_message":"canonical_sha256_bytes"},"source_id":"1605.07669","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8355ad853bb9c472da71122a1a8592ffd51bcb344f176359a203e1ca23334312","sha256:d29d7dbe46069e4225ed613f433319925f3ce07af4a7914006e1c7d42daa42dc"],"state_sha256":"b137e01e587eee45998c9b98fd0a26b6fd2a04a9dccd18c3df1fe6afaa630b64"}