{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:6JQD7644XZWEKEVUVIHY35GOKA","short_pith_number":"pith:6JQD7644","canonical_record":{"source":{"id":"1906.07805","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:44:07Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"2231384f0b51922398ca5024a88ee351bd14f64d93b18ae6ffffe7029d846bee","abstract_canon_sha256":"314458a7a5d2c3520ffb2663f2fc82dcaa7c23fface436bf3d8e90b75f42db8b"},"schema_version":"1.0"},"canonical_sha256":"f2603ffb9cbe6c4512b4aa0f8df4ce50142ba5160d948fce677fdf6afa806d48","source":{"kind":"arxiv","id":"1906.07805","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.07805","created_at":"2026-05-17T23:42:57Z"},{"alias_kind":"arxiv_version","alias_value":"1906.07805v1","created_at":"2026-05-17T23:42:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.07805","created_at":"2026-05-17T23:42:57Z"},{"alias_kind":"pith_short_12","alias_value":"6JQD7644XZWE","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6JQD7644XZWEKEVU","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6JQD7644","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:6JQD7644XZWEKEVUVIHY35GOKA","target":"record","payload":{"canonical_record":{"source":{"id":"1906.07805","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:44:07Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"2231384f0b51922398ca5024a88ee351bd14f64d93b18ae6ffffe7029d846bee","abstract_canon_sha256":"314458a7a5d2c3520ffb2663f2fc82dcaa7c23fface436bf3d8e90b75f42db8b"},"schema_version":"1.0"},"canonical_sha256":"f2603ffb9cbe6c4512b4aa0f8df4ce50142ba5160d948fce677fdf6afa806d48","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:57.705960Z","signature_b64":"13jdKxPo3SQX5q2LpoE5OfwZiAB7IGXsPCPWHBLxXr1n53gCWvsgtAOR6LqCcofW5JCFPX/Zfr7NLsZuPiHwCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f2603ffb9cbe6c4512b4aa0f8df4ce50142ba5160d948fce677fdf6afa806d48","last_reissued_at":"2026-05-17T23:42:57.705326Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:57.705326Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.07805","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:42:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YyjZVSP81keASirVOWPJsJzuBKo/0iz+kh1P32KzUhCADMjQNBUKBeR0QW2pjFuT/amed2Vrh0qBWJdQuSMTDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T12:48:42.490154Z"},"content_sha256":"dbaa88f93c6add88b22053b624d648b24456e1f9bf2b18c1bbbc0207e893042b","schema_version":"1.0","event_id":"sha256:dbaa88f93c6add88b22053b624d648b24456e1f9bf2b18c1bbbc0207e893042b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:6JQD7644XZWEKEVUVIHY35GOKA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Directed Exploration for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Emma Brunskill, Zhaohan Daniel Guo","submitted_at":"2019-06-18T20:44:07Z","abstract_excerpt":"Efficient exploration is necessary to achieve good sample efficiency for reinforcement learning in general. From small, tabular settings such as gridworlds to large, continuous and sparse reward settings such as robotic object manipulation tasks, exploration through adding an uncertainty bonus to the reward function has been shown to be effective when the uncertainty is able to accurately drive exploration towards promising states. However reward bonuses can still be inefficient since they are non-stationary, which means that we must wait for function approximators to catch up and converge aga"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.07805","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:42:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5V9YdwxdUvlwYgXzELVabiZFerjbnghj+wl0y74pN88P/yAYYn/peP3LogLIQLO5xuHe5+EY1BzTygTEfemKBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T12:48:42.490525Z"},"content_sha256":"76763320b1ac552ef8c80bd6e7be4aaee5790ff0e8f192cba4291201cafde150","schema_version":"1.0","event_id":"sha256:76763320b1ac552ef8c80bd6e7be4aaee5790ff0e8f192cba4291201cafde150"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6JQD7644XZWEKEVUVIHY35GOKA/bundle.json","state_url":"https://pith.science/pith/6JQD7644XZWEKEVUVIHY35GOKA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6JQD7644XZWEKEVUVIHY35GOKA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T12:48:42Z","links":{"resolver":"https://pith.science/pith/6JQD7644XZWEKEVUVIHY35GOKA","bundle":"https://pith.science/pith/6JQD7644XZWEKEVUVIHY35GOKA/bundle.json","state":"https://pith.science/pith/6JQD7644XZWEKEVUVIHY35GOKA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6JQD7644XZWEKEVUVIHY35GOKA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:6JQD7644XZWEKEVUVIHY35GOKA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"314458a7a5d2c3520ffb2663f2fc82dcaa7c23fface436bf3d8e90b75f42db8b","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:44:07Z","title_canon_sha256":"2231384f0b51922398ca5024a88ee351bd14f64d93b18ae6ffffe7029d846bee"},"schema_version":"1.0","source":{"id":"1906.07805","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.07805","created_at":"2026-05-17T23:42:57Z"},{"alias_kind":"arxiv_version","alias_value":"1906.07805v1","created_at":"2026-05-17T23:42:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.07805","created_at":"2026-05-17T23:42:57Z"},{"alias_kind":"pith_short_12","alias_value":"6JQD7644XZWE","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6JQD7644XZWEKEVU","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6JQD7644","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:76763320b1ac552ef8c80bd6e7be4aaee5790ff0e8f192cba4291201cafde150","target":"graph","created_at":"2026-05-17T23:42:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Efficient exploration is necessary to achieve good sample efficiency for reinforcement learning in general. From small, tabular settings such as gridworlds to large, continuous and sparse reward settings such as robotic object manipulation tasks, exploration through adding an uncertainty bonus to the reward function has been shown to be effective when the uncertainty is able to accurately drive exploration towards promising states. However reward bonuses can still be inefficient since they are non-stationary, which means that we must wait for function approximators to catch up and converge aga","authors_text":"Emma Brunskill, Zhaohan Daniel Guo","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:44:07Z","title":"Directed Exploration for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.07805","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dbaa88f93c6add88b22053b624d648b24456e1f9bf2b18c1bbbc0207e893042b","target":"record","created_at":"2026-05-17T23:42:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"314458a7a5d2c3520ffb2663f2fc82dcaa7c23fface436bf3d8e90b75f42db8b","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:44:07Z","title_canon_sha256":"2231384f0b51922398ca5024a88ee351bd14f64d93b18ae6ffffe7029d846bee"},"schema_version":"1.0","source":{"id":"1906.07805","kind":"arxiv","version":1}},"canonical_sha256":"f2603ffb9cbe6c4512b4aa0f8df4ce50142ba5160d948fce677fdf6afa806d48","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f2603ffb9cbe6c4512b4aa0f8df4ce50142ba5160d948fce677fdf6afa806d48","first_computed_at":"2026-05-17T23:42:57.705326Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:42:57.705326Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"13jdKxPo3SQX5q2LpoE5OfwZiAB7IGXsPCPWHBLxXr1n53gCWvsgtAOR6LqCcofW5JCFPX/Zfr7NLsZuPiHwCA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:42:57.705960Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.07805","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dbaa88f93c6add88b22053b624d648b24456e1f9bf2b18c1bbbc0207e893042b","sha256:76763320b1ac552ef8c80bd6e7be4aaee5790ff0e8f192cba4291201cafde150"],"state_sha256":"37cb1850497a1cd0e5739544aac6c628a2fedbdeedb158c9267b36f9d763038b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"evVwajrgiGh1EY8eDFHKWGHPbuBAG8+Buzw5fVyZlQhlbTaJiUB0qulTWtq/A8adMv8z6bIU/MzC0Pqi5BODBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T12:48:42.492502Z","bundle_sha256":"be0bbfab000e71ee9cb2a56bea126756bc020468e60d93e2b5ade99df9f99e2c"}}