{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:VJFBNGEDPSVELXXWMMRYWKSTIH","short_pith_number":"pith:VJFBNGED","schema_version":"1.0","canonical_sha256":"aa4a1698837caa45def663238b2a5341f3290d1794a246e2c4ad5b04d7a9c5c3","source":{"kind":"arxiv","id":"1902.03657","version":1},"attestation_state":"computed","paper":{"title":"A Bandit Framework for Optimal Selection of Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Abdul-Saboor Sheikh, Andreas Merentitis, Kashif Rasul, Roland Vollgraf, Urs Bergmann","submitted_at":"2019-02-10T19:39:28Z","abstract_excerpt":"Deep Reinforcement Learning has been shown to be very successful in complex games, e.g. Atari or Go. These games have clearly defined rules, and hence allow simulation. In many practical applications, however, interactions with the environment are costly and a good simulator of the environment is not available. Further, as environments differ by application, the optimal inductive bias (architecture, hyperparameters, etc.) of a reinforcement agent depends on the application. In this work, we propose a multi-arm bandit framework that selects from a set of different reinforcement learning agents "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.03657","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-10T19:39:28Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"7d02f27ee013b1e6db1112585f841c562a25e07dcdbf7bcc743203fba0269b26","abstract_canon_sha256":"acf4ccc2cdce1a91907ed09e6dff3359f703bd51cdea16299ffd724f32437e46"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:20.032661Z","signature_b64":"vL5uiNIgVxbEluTUzYrPX/yem+ulnZzP0GjE5MQvGax3QPCoYs+enXRYR4yTcVyUFTTgRf+BEnnPGX1MS/tEDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aa4a1698837caa45def663238b2a5341f3290d1794a246e2c4ad5b04d7a9c5c3","last_reissued_at":"2026-05-17T23:54:20.032188Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:20.032188Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Bandit Framework for Optimal Selection of Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Abdul-Saboor Sheikh, Andreas Merentitis, Kashif Rasul, Roland Vollgraf, Urs Bergmann","submitted_at":"2019-02-10T19:39:28Z","abstract_excerpt":"Deep Reinforcement Learning has been shown to be very successful in complex games, e.g. Atari or Go. These games have clearly defined rules, and hence allow simulation. In many practical applications, however, interactions with the environment are costly and a good simulator of the environment is not available. Further, as environments differ by application, the optimal inductive bias (architecture, hyperparameters, etc.) of a reinforcement agent depends on the application. In this work, we propose a multi-arm bandit framework that selects from a set of different reinforcement learning agents "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.03657","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.03657","created_at":"2026-05-17T23:54:20.032256+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.03657v1","created_at":"2026-05-17T23:54:20.032256+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.03657","created_at":"2026-05-17T23:54:20.032256+00:00"},{"alias_kind":"pith_short_12","alias_value":"VJFBNGEDPSVE","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"VJFBNGEDPSVELXXW","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"VJFBNGED","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH","json":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH.json","graph_json":"https://pith.science/api/pith-number/VJFBNGEDPSVELXXWMMRYWKSTIH/graph.json","events_json":"https://pith.science/api/pith-number/VJFBNGEDPSVELXXWMMRYWKSTIH/events.json","paper":"https://pith.science/paper/VJFBNGED"},"agent_actions":{"view_html":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH","download_json":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH.json","view_paper":"https://pith.science/paper/VJFBNGED","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.03657&json=true","fetch_graph":"https://pith.science/api/pith-number/VJFBNGEDPSVELXXWMMRYWKSTIH/graph.json","fetch_events":"https://pith.science/api/pith-number/VJFBNGEDPSVELXXWMMRYWKSTIH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH/action/storage_attestation","attest_author":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH/action/author_attestation","sign_citation":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH/action/citation_signature","submit_replication":"https://pith.science/pith/VJFBNGEDPSVELXXWMMRYWKSTIH/action/replication_record"}},"created_at":"2026-05-17T23:54:20.032256+00:00","updated_at":"2026-05-17T23:54:20.032256+00:00"}