{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:TTBDHNZO4LB4EAWFFRYSHJE2CH","short_pith_number":"pith:TTBDHNZO","schema_version":"1.0","canonical_sha256":"9cc233b72ee2c3c202c52c7123a49a11dff2985f075037b6357859d79f3b5c6a","source":{"kind":"arxiv","id":"1802.10592","version":2},"attestation_state":"computed","paper":{"title":"Model-Ensemble Trust-Region Policy Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Aviv Tamar, Ignasi Clavera, Pieter Abbeel, Thanard Kurutach, Yan Duan","submitted_at":"2018-02-28T18:58:22Z","abstract_excerpt":"Model-free reinforcement learning (RL) methods are succeeding in a growing number of tasks, aided by recent advances in deep learning. However, they tend to suffer from high sample complexity, which hinders their use in real-world domains. Alternatively, model-based reinforcement learning promises to reduce sample complexity, but tends to require careful tuning and to date have succeeded mainly in restrictive domains where simple models are sufficient for learning. In this paper, we analyze the behavior of vanilla model-based reinforcement learning methods when deep neural networks are used to"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.10592","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-02-28T18:58:22Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"909422759859d9a24055c640faa6d06ed6f72f11d31e644821af6ef7e61b69f9","abstract_canon_sha256":"7ecd29b472110aa4c327a6083920b1ffb679f8311a03777f6de42a39ea8d92b5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:04:02.728270Z","signature_b64":"q5xbzm3CjYVvnix763xxBRdgaSDWJeqJovOrbsZ6LLy3lxfkn4uHEFxx202weMSK5af524v5Hh5hysVgnEcCAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9cc233b72ee2c3c202c52c7123a49a11dff2985f075037b6357859d79f3b5c6a","last_reissued_at":"2026-05-18T00:04:02.727614Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:04:02.727614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Model-Ensemble Trust-Region Policy Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Aviv Tamar, Ignasi Clavera, Pieter Abbeel, Thanard Kurutach, Yan Duan","submitted_at":"2018-02-28T18:58:22Z","abstract_excerpt":"Model-free reinforcement learning (RL) methods are succeeding in a growing number of tasks, aided by recent advances in deep learning. However, they tend to suffer from high sample complexity, which hinders their use in real-world domains. Alternatively, model-based reinforcement learning promises to reduce sample complexity, but tends to require careful tuning and to date have succeeded mainly in restrictive domains where simple models are sufficient for learning. In this paper, we analyze the behavior of vanilla model-based reinforcement learning methods when deep neural networks are used to"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.10592","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.10592","created_at":"2026-05-18T00:04:02.727730+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.10592v2","created_at":"2026-05-18T00:04:02.727730+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.10592","created_at":"2026-05-18T00:04:02.727730+00:00"},{"alias_kind":"pith_short_12","alias_value":"TTBDHNZO4LB4","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_16","alias_value":"TTBDHNZO4LB4EAWF","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_8","alias_value":"TTBDHNZO","created_at":"2026-05-18T12:32:56.356000+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":9,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"1906.08312","citing_title":"Calibrated Model-Based Deep Reinforcement Learning","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"1906.08649","citing_title":"Exploring Model-based Planning with Policy Networks","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"1906.10717","citing_title":"Uncertainty-aware Model-based Policy Optimization","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02057","citing_title":"Benchmarking Model-Based Reinforcement Learning","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2010.02193","citing_title":"Mastering Atari with Discrete World Models","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27411","citing_title":"Detecting is Easy, Adapting is Hard: Local Expert Growth for Visual Model-Based Reinforcement Learning under Distribution Shift","ref_index":16,"is_internal_anchor":false},{"citing_arxiv_id":"2605.09808","citing_title":"Quantifying the Utility of User Simulators for Building Collaborative LLM Assistants","ref_index":98,"is_internal_anchor":false},{"citing_arxiv_id":"1912.01603","citing_title":"Dream to Control: Learning Behaviors by Latent Imagination","ref_index":30,"is_internal_anchor":false},{"citing_arxiv_id":"2604.09035","citing_title":"Advantage-Guided Diffusion for Model-Based Reinforcement Learning","ref_index":6,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH","json":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH.json","graph_json":"https://pith.science/api/pith-number/TTBDHNZO4LB4EAWFFRYSHJE2CH/graph.json","events_json":"https://pith.science/api/pith-number/TTBDHNZO4LB4EAWFFRYSHJE2CH/events.json","paper":"https://pith.science/paper/TTBDHNZO"},"agent_actions":{"view_html":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH","download_json":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH.json","view_paper":"https://pith.science/paper/TTBDHNZO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.10592&json=true","fetch_graph":"https://pith.science/api/pith-number/TTBDHNZO4LB4EAWFFRYSHJE2CH/graph.json","fetch_events":"https://pith.science/api/pith-number/TTBDHNZO4LB4EAWFFRYSHJE2CH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH/action/storage_attestation","attest_author":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH/action/author_attestation","sign_citation":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH/action/citation_signature","submit_replication":"https://pith.science/pith/TTBDHNZO4LB4EAWFFRYSHJE2CH/action/replication_record"}},"created_at":"2026-05-18T00:04:02.727730+00:00","updated_at":"2026-05-18T00:04:02.727730+00:00"}