{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:TTBDHNZO4LB4EAWFFRYSHJE2CH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7ecd29b472110aa4c327a6083920b1ffb679f8311a03777f6de42a39ea8d92b5","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-02-28T18:58:22Z","title_canon_sha256":"909422759859d9a24055c640faa6d06ed6f72f11d31e644821af6ef7e61b69f9"},"schema_version":"1.0","source":{"id":"1802.10592","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.10592","created_at":"2026-05-18T00:04:02Z"},{"alias_kind":"arxiv_version","alias_value":"1802.10592v2","created_at":"2026-05-18T00:04:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.10592","created_at":"2026-05-18T00:04:02Z"},{"alias_kind":"pith_short_12","alias_value":"TTBDHNZO4LB4","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"TTBDHNZO4LB4EAWF","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"TTBDHNZO","created_at":"2026-05-18T12:32:56Z"}],"graph_snapshots":[{"event_id":"sha256:5af46a2499cb86e5786899936dc3c25cf26ca107a5157271db982284070f0885","target":"graph","created_at":"2026-05-18T00:04:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Model-free reinforcement learning (RL) methods are succeeding in a growing number of tasks, aided by recent advances in deep learning. However, they tend to suffer from high sample complexity, which hinders their use in real-world domains. Alternatively, model-based reinforcement learning promises to reduce sample complexity, but tends to require careful tuning and to date have succeeded mainly in restrictive domains where simple models are sufficient for learning. In this paper, we analyze the behavior of vanilla model-based reinforcement learning methods when deep neural networks are used to","authors_text":"Aviv Tamar, Ignasi Clavera, Pieter Abbeel, Thanard Kurutach, Yan Duan","cross_cats":["cs.AI","cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-02-28T18:58:22Z","title":"Model-Ensemble Trust-Region Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.10592","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:50094ec03a9ffdbf98e76d37f479cffeac2e0cc2121ee986bba60d013b2585bf","target":"record","created_at":"2026-05-18T00:04:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7ecd29b472110aa4c327a6083920b1ffb679f8311a03777f6de42a39ea8d92b5","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-02-28T18:58:22Z","title_canon_sha256":"909422759859d9a24055c640faa6d06ed6f72f11d31e644821af6ef7e61b69f9"},"schema_version":"1.0","source":{"id":"1802.10592","kind":"arxiv","version":2}},"canonical_sha256":"9cc233b72ee2c3c202c52c7123a49a11dff2985f075037b6357859d79f3b5c6a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9cc233b72ee2c3c202c52c7123a49a11dff2985f075037b6357859d79f3b5c6a","first_computed_at":"2026-05-18T00:04:02.727614Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:04:02.727614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"q5xbzm3CjYVvnix763xxBRdgaSDWJeqJovOrbsZ6LLy3lxfkn4uHEFxx202weMSK5af524v5Hh5hysVgnEcCAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:04:02.728270Z","signed_message":"canonical_sha256_bytes"},"source_id":"1802.10592","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:50094ec03a9ffdbf98e76d37f479cffeac2e0cc2121ee986bba60d013b2585bf","sha256:5af46a2499cb86e5786899936dc3c25cf26ca107a5157271db982284070f0885"],"state_sha256":"f8aca451b6ba8e1a934552e72456baeabc607f3d5af206125a8f2ea083eae554"}