{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:6MK4WV36PPBOG53VDXJBWMC65H","short_pith_number":"pith:6MK4WV36","canonical_record":{"source":{"id":"1812.07544","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-18T18:20:49Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"fd9d930c77eb3ea7b87c827e4c69d423c4e247b8df35eb4979481b63fcd760c5","abstract_canon_sha256":"456644b5ecb1871e0b453fd39fca58085b7968c3fb3aa3fcac9e134046da95f2"},"schema_version":"1.0"},"canonical_sha256":"f315cb577e7bc2e377751dd21b305ee9db5d0ecfb24b88e1dcdec62afa9b98f3","source":{"kind":"arxiv","id":"1812.07544","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.07544","created_at":"2026-05-17T23:50:33Z"},{"alias_kind":"arxiv_version","alias_value":"1812.07544v2","created_at":"2026-05-17T23:50:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.07544","created_at":"2026-05-17T23:50:33Z"},{"alias_kind":"pith_short_12","alias_value":"6MK4WV36PPBO","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"6MK4WV36PPBOG53V","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"6MK4WV36","created_at":"2026-05-18T12:32:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:6MK4WV36PPBOG53VDXJBWMC65H","target":"record","payload":{"canonical_record":{"source":{"id":"1812.07544","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-18T18:20:49Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"fd9d930c77eb3ea7b87c827e4c69d423c4e247b8df35eb4979481b63fcd760c5","abstract_canon_sha256":"456644b5ecb1871e0b453fd39fca58085b7968c3fb3aa3fcac9e134046da95f2"},"schema_version":"1.0"},"canonical_sha256":"f315cb577e7bc2e377751dd21b305ee9db5d0ecfb24b88e1dcdec62afa9b98f3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:33.462416Z","signature_b64":"RluC1CJqjiVolBdVa8OpC8QdVwvsPTm7ACoy337PmcJQRYyM9+DBDEyn2HQdNQE2WYN48BEM54w0106RX2fmAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f315cb577e7bc2e377751dd21b305ee9db5d0ecfb24b88e1dcdec62afa9b98f3","last_reissued_at":"2026-05-17T23:50:33.461730Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:33.461730Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.07544","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T/FTZk8AHaq2CkP+v6FI3LsSEr1vnMpdFXup6CwgfpdKPt5ekUIKHoBxncNdvRG7cJuxNXN3Mpi5pYU+VcdNCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T08:22:38.476053Z"},"content_sha256":"47345f00fcc3282793e0ee1f2a38a22e2e64fd9ee58521ca8fb75c0ad30b5e3e","schema_version":"1.0","event_id":"sha256:47345f00fcc3282793e0ee1f2a38a22e2e64fd9ee58521ca8fb75c0ad30b5e3e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:6MK4WV36PPBOG53VDXJBWMC65H","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Information-Directed Exploration for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Andreas Krause, Felix Berkenkamp, Johannes Kirschner, Nikolay Nikolov","submitted_at":"2018-12-18T18:20:49Z","abstract_excerpt":"Efficient exploration remains a major challenge for reinforcement learning. One reason is that the variability of the returns often depends on the current state and action, and is therefore heteroscedastic. Classical exploration strategies such as upper confidence bound algorithms and Thompson sampling fail to appropriately account for heteroscedasticity, even in the bandit setting. Motivated by recent findings that address this issue in bandits, we propose to use Information-Directed Sampling (IDS) for exploration in reinforcement learning. As our main contribution, we build on recent advance"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.07544","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"56qpOmHHTI+UIFwhTbP19nuXV8y4/ezWSRe13Zq0z2L2zJZABYH58kr+nNarAnasyH6lipQELSpPKWLDSQN3Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T08:22:38.476440Z"},"content_sha256":"5b6033bf528fe7a6006bce61279fabc704e2e5b64e69acb1a4c244173c21bcc6","schema_version":"1.0","event_id":"sha256:5b6033bf528fe7a6006bce61279fabc704e2e5b64e69acb1a4c244173c21bcc6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6MK4WV36PPBOG53VDXJBWMC65H/bundle.json","state_url":"https://pith.science/pith/6MK4WV36PPBOG53VDXJBWMC65H/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6MK4WV36PPBOG53VDXJBWMC65H/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-20T08:22:38Z","links":{"resolver":"https://pith.science/pith/6MK4WV36PPBOG53VDXJBWMC65H","bundle":"https://pith.science/pith/6MK4WV36PPBOG53VDXJBWMC65H/bundle.json","state":"https://pith.science/pith/6MK4WV36PPBOG53VDXJBWMC65H/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6MK4WV36PPBOG53VDXJBWMC65H/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:6MK4WV36PPBOG53VDXJBWMC65H","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"456644b5ecb1871e0b453fd39fca58085b7968c3fb3aa3fcac9e134046da95f2","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-18T18:20:49Z","title_canon_sha256":"fd9d930c77eb3ea7b87c827e4c69d423c4e247b8df35eb4979481b63fcd760c5"},"schema_version":"1.0","source":{"id":"1812.07544","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.07544","created_at":"2026-05-17T23:50:33Z"},{"alias_kind":"arxiv_version","alias_value":"1812.07544v2","created_at":"2026-05-17T23:50:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.07544","created_at":"2026-05-17T23:50:33Z"},{"alias_kind":"pith_short_12","alias_value":"6MK4WV36PPBO","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"6MK4WV36PPBOG53V","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"6MK4WV36","created_at":"2026-05-18T12:32:11Z"}],"graph_snapshots":[{"event_id":"sha256:5b6033bf528fe7a6006bce61279fabc704e2e5b64e69acb1a4c244173c21bcc6","target":"graph","created_at":"2026-05-17T23:50:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Efficient exploration remains a major challenge for reinforcement learning. One reason is that the variability of the returns often depends on the current state and action, and is therefore heteroscedastic. Classical exploration strategies such as upper confidence bound algorithms and Thompson sampling fail to appropriately account for heteroscedasticity, even in the bandit setting. Motivated by recent findings that address this issue in bandits, we propose to use Information-Directed Sampling (IDS) for exploration in reinforcement learning. As our main contribution, we build on recent advance","authors_text":"Andreas Krause, Felix Berkenkamp, Johannes Kirschner, Nikolay Nikolov","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-18T18:20:49Z","title":"Information-Directed Exploration for Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.07544","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:47345f00fcc3282793e0ee1f2a38a22e2e64fd9ee58521ca8fb75c0ad30b5e3e","target":"record","created_at":"2026-05-17T23:50:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"456644b5ecb1871e0b453fd39fca58085b7968c3fb3aa3fcac9e134046da95f2","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-18T18:20:49Z","title_canon_sha256":"fd9d930c77eb3ea7b87c827e4c69d423c4e247b8df35eb4979481b63fcd760c5"},"schema_version":"1.0","source":{"id":"1812.07544","kind":"arxiv","version":2}},"canonical_sha256":"f315cb577e7bc2e377751dd21b305ee9db5d0ecfb24b88e1dcdec62afa9b98f3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f315cb577e7bc2e377751dd21b305ee9db5d0ecfb24b88e1dcdec62afa9b98f3","first_computed_at":"2026-05-17T23:50:33.461730Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:33.461730Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RluC1CJqjiVolBdVa8OpC8QdVwvsPTm7ACoy337PmcJQRYyM9+DBDEyn2HQdNQE2WYN48BEM54w0106RX2fmAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:33.462416Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.07544","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:47345f00fcc3282793e0ee1f2a38a22e2e64fd9ee58521ca8fb75c0ad30b5e3e","sha256:5b6033bf528fe7a6006bce61279fabc704e2e5b64e69acb1a4c244173c21bcc6"],"state_sha256":"024e93970d114105fdf6f7f6ee597600b2a4b85dcc2fd12e16d6ee524ccf70c0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+l5OLFc1zI4oPEsus5VH1VYgyt4YKRlxdJigr7mRXz8nJ+biwOMhICM7AxQtIqL8jAavSncpqfHi1gSQc9qpAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-20T08:22:38.478516Z","bundle_sha256":"98499cf5984c486e228748db72f70fb21622cf2bba370ef087d813faa896691b"}}