{"work":{"id":"399c3bf3-740c-41a8-bb6b-dfe1ea43e56d","openalex_id":null,"doi":null,"arxiv_id":"1910.01708","raw_key":null,"title":"Benchmarking Batch Deep Reinforcement Learning Algorithms","authors":null,"authors_text":"Scott Fujimoto, Edoardo Conti, Mohammad Ghavamzadeh, Joelle Pineau","year":2019,"venue":"cs.LG","abstract":"Widely-used deep reinforcement learning algorithms have been shown to fail in the batch setting--learning from a fixed data set without interaction with the environment. Following this result, there have been several papers showing reasonable performances under a variety of environments and batch settings. In this paper, we benchmark the performance of recent off-policy and batch reinforcement learning algorithms under unified settings on the Atari domain, with data generated by a single partially-trained behavioral policy. We find that under these conditions, many of these algorithms underperform DQN trained online with the same amount of data, as well as the partially-trained behavioral policy. To introduce a strong baseline, we adapt the Batch-Constrained Q-learning algorithm to a discrete-action setting, and show it outperforms all existing algorithms at this task.","external_url":"https://arxiv.org/abs/1910.01708","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-24T03:43:50.364157+00:00","pith_arxiv_id":"1910.01708","created_at":"2026-05-11T04:06:00.086417+00:00","updated_at":"2026-05-24T03:43:50.364157+00:00","title_quality_ok":true,"display_title":"Benchmarking Batch Deep Reinforcement Learning Algorithms","render_title":"Benchmarking Batch Deep Reinforcement Learning Algorithms"},"hub":{"state":{"work_id":"399c3bf3-740c-41a8-bb6b-dfe1ea43e56d","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":21,"external_cited_by_count":null,"distinct_field_count":4,"first_pith_cited_at":"2024-02-07T21:58:40+00:00","last_pith_cited_at":"2026-05-19T07:09:32+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-01T04:33:07.145783+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":4},{"context_role":"dataset","n":1},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":4},{"context_polarity":"use_dataset","n":1},{"context_polarity":"use_method","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}