{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UOPYLJGSNGLGRXPZLFNRFV57HG","short_pith_number":"pith:UOPYLJGS","schema_version":"1.0","canonical_sha256":"a39f85a4d2699668ddf9595b12d7bf39a579fba4d2ce506c808c268285abe875","source":{"kind":"arxiv","id":"2601.21306","version":2},"attestation_state":"computed","paper":{"title":"The Surprising Difficulty of Search in Model-Based Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Brandon Amos, Gregory Dudek, Mikael Henaff, Scott Fujimoto, Wei-Di Chang","submitted_at":"2026-01-29T05:58:24Z","abstract_excerpt":"This paper investigates search in model-based reinforcement learning (RL). Conventional wisdom holds that long-term predictions and compounding errors are the primary obstacles for model-based RL. We challenge this view, showing that search is not a drop-in replacement for a learned policy. Surprisingly, we find that search can harm performance even when the model is highly accurate. Instead, we show that mitigating overestimation bias matters more than improving model or value function accuracy. Building on this insight, we identify that taking the minimum over an ensemble of value functions "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2601.21306","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-29T05:58:24Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"2a6529945f3b87c74821fef39afdbcfba502a4765be038894f8f3ec7283c6273","abstract_canon_sha256":"de7ba01eafa20c94d369785afdccd9dd72bb393ad7d07af75fc1cfb7d778cd43"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:13.538895Z","signature_b64":"Fm8fcITz+i2iztJBjzOGMfRI3LE50kzpk7Cb+0JHb6e087MxqP+rZOHvzF4yxKxxqxtKAqKs1IgUbv33gf8hCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a39f85a4d2699668ddf9595b12d7bf39a579fba4d2ce506c808c268285abe875","last_reissued_at":"2026-05-25T02:01:13.538144Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:13.538144Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Surprising Difficulty of Search in Model-Based Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Brandon Amos, Gregory Dudek, Mikael Henaff, Scott Fujimoto, Wei-Di Chang","submitted_at":"2026-01-29T05:58:24Z","abstract_excerpt":"This paper investigates search in model-based reinforcement learning (RL). Conventional wisdom holds that long-term predictions and compounding errors are the primary obstacles for model-based RL. We challenge this view, showing that search is not a drop-in replacement for a learned policy. Surprisingly, we find that search can harm performance even when the model is highly accurate. Instead, we show that mitigating overestimation bias matters more than improving model or value function accuracy. Building on this insight, we identify that taking the minimum over an ensemble of value functions "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.21306","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.21306/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2601.21306","created_at":"2026-05-25T02:01:13.538264+00:00"},{"alias_kind":"arxiv_version","alias_value":"2601.21306v2","created_at":"2026-05-25T02:01:13.538264+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.21306","created_at":"2026-05-25T02:01:13.538264+00:00"},{"alias_kind":"pith_short_12","alias_value":"UOPYLJGSNGLG","created_at":"2026-05-25T02:01:13.538264+00:00"},{"alias_kind":"pith_short_16","alias_value":"UOPYLJGSNGLGRXPZ","created_at":"2026-05-25T02:01:13.538264+00:00"},{"alias_kind":"pith_short_8","alias_value":"UOPYLJGS","created_at":"2026-05-25T02:01:13.538264+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG","json":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG.json","graph_json":"https://pith.science/api/pith-number/UOPYLJGSNGLGRXPZLFNRFV57HG/graph.json","events_json":"https://pith.science/api/pith-number/UOPYLJGSNGLGRXPZLFNRFV57HG/events.json","paper":"https://pith.science/paper/UOPYLJGS"},"agent_actions":{"view_html":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG","download_json":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG.json","view_paper":"https://pith.science/paper/UOPYLJGS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2601.21306&json=true","fetch_graph":"https://pith.science/api/pith-number/UOPYLJGSNGLGRXPZLFNRFV57HG/graph.json","fetch_events":"https://pith.science/api/pith-number/UOPYLJGSNGLGRXPZLFNRFV57HG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG/action/storage_attestation","attest_author":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG/action/author_attestation","sign_citation":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG/action/citation_signature","submit_replication":"https://pith.science/pith/UOPYLJGSNGLGRXPZLFNRFV57HG/action/replication_record"}},"created_at":"2026-05-25T02:01:13.538264+00:00","updated_at":"2026-05-25T02:01:13.538264+00:00"}