{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:WHFOGVALEWYQHEL2RHCQB3V3V2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8e094005ddc64a16b58fc19a3dff80594edb8bdecf8fda7b2c18ef1fc28177a3","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-28T17:00:59Z","title_canon_sha256":"90f5f0c1dfa83d0f5b9d02abeee6f4acf97ea2a02de8859c924902ff910f246b"},"schema_version":"1.0","source":{"id":"2004.13657","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2004.13657","created_at":"2026-07-05T00:59:01Z"},{"alias_kind":"arxiv_version","alias_value":"2004.13657v1","created_at":"2026-07-05T00:59:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2004.13657","created_at":"2026-07-05T00:59:01Z"},{"alias_kind":"pith_short_12","alias_value":"WHFOGVALEWYQ","created_at":"2026-07-05T00:59:01Z"},{"alias_kind":"pith_short_16","alias_value":"WHFOGVALEWYQHEL2","created_at":"2026-07-05T00:59:01Z"},{"alias_kind":"pith_short_8","alias_value":"WHFOGVAL","created_at":"2026-07-05T00:59:01Z"}],"graph_snapshots":[{"event_id":"sha256:ad5db2a4d593cfaae9e8d830edf3f1ee30e0221cd68d13d99ec4d2133d46e649","target":"graph","created_at":"2026-07-05T00:59:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2004.13657/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Human-computer interactive systems that rely on machine learning are becoming paramount to the lives of millions of people who use digital assistants on a daily basis. Yet, further advances are limited by the availability of data and the cost of acquiring new samples. One way to address this problem is by improving the sample efficiency of current approaches. As a solution path, we present a model-based reinforcement learning algorithm for an interactive dialogue task. We build on commonly used actor-critic methods, adding an environment model and planner that augments a learning agent to lear","authors_text":"Graham W. Taylor, Katya Kudashkina, Michael Bowling, Valliappa Chockalingam","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-28T17:00:59Z","title":"Sample-Efficient Model-based Actor-Critic for an Interactive Dialogue Task"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2004.13657","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:35ac957d2273fa2e65420ee068aec6a2ed4ad5226ede268a45c5423cd17ca913","target":"record","created_at":"2026-07-05T00:59:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8e094005ddc64a16b58fc19a3dff80594edb8bdecf8fda7b2c18ef1fc28177a3","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-28T17:00:59Z","title_canon_sha256":"90f5f0c1dfa83d0f5b9d02abeee6f4acf97ea2a02de8859c924902ff910f246b"},"schema_version":"1.0","source":{"id":"2004.13657","kind":"arxiv","version":1}},"canonical_sha256":"b1cae3540b25b103917a89c500eebbae931e42703425fa3058ea28dbfacc7c2d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b1cae3540b25b103917a89c500eebbae931e42703425fa3058ea28dbfacc7c2d","first_computed_at":"2026-07-05T00:59:01.647604Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T00:59:01.647604Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RviyPbugB6/i3N3e7iBN1wlHNg+wiPqjcIgZV0zy+0BQd8trua6I/cqkzZZCPyzGgV3W7Dt5AiuvflZs54H5Cw==","signature_status":"signed_v1","signed_at":"2026-07-05T00:59:01.647986Z","signed_message":"canonical_sha256_bytes"},"source_id":"2004.13657","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:35ac957d2273fa2e65420ee068aec6a2ed4ad5226ede268a45c5423cd17ca913","sha256:ad5db2a4d593cfaae9e8d830edf3f1ee30e0221cd68d13d99ec4d2133d46e649"],"state_sha256":"962b0207ba14e33d86613c3f91f5146479b98846b9f663cf09173efdecb19cda"}