{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:ANJMEENCPDEFSO75YSPSDQNLLJ","short_pith_number":"pith:ANJMEENC","canonical_record":{"source":{"id":"1707.00524","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-03T13:07:40Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"8feabd5366200a3806e57b979590ccbe7341187fd1605db755633b9fdac7ba36","abstract_canon_sha256":"bd76b0e5152840aab9d2cd75c9b2012b4dda56956af1481ed49c010d30eab453"},"schema_version":"1.0"},"canonical_sha256":"0352c211a278c8593bfdc49f21c1ab5a73c60e3a050a1daaa68b453bcabd45fe","source":{"kind":"arxiv","id":"1707.00524","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.00524","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"1707.00524v2","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.00524","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"ANJMEENCPDEF","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"ANJMEENCPDEFSO75","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"ANJMEENC","created_at":"2026-05-18T12:31:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:ANJMEENCPDEFSO75YSPSDQNLLJ","target":"record","payload":{"canonical_record":{"source":{"id":"1707.00524","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-03T13:07:40Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"8feabd5366200a3806e57b979590ccbe7341187fd1605db755633b9fdac7ba36","abstract_canon_sha256":"bd76b0e5152840aab9d2cd75c9b2012b4dda56956af1481ed49c010d30eab453"},"schema_version":"1.0"},"canonical_sha256":"0352c211a278c8593bfdc49f21c1ab5a73c60e3a050a1daaa68b453bcabd45fe","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:24.275778Z","signature_b64":"bHLCpFnSoOKDGqjdG+7bXaLoJp63jIA4qwH6Z3AsqrXZHUpFaPQQ82LcWF83iN74vR7MW0OpnSE4qGqGnxG9Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0352c211a278c8593bfdc49f21c1ab5a73c60e3a050a1daaa68b453bcabd45fe","last_reissued_at":"2026-05-18T00:17:24.275058Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:24.275058Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.00524","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:17:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nV+dO1a/T2TpNs4FAkHZjXPNvme3+5LMrnEYeTI9kf3mQWrjX6rYpWVjONO6eq/20gwUYreZxRBAGg4fC0y0CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T23:06:52.063295Z"},"content_sha256":"f40a4fc318898509d6f8abbff2f833f3982379a6aea3b4db6185fd837c01e78e","schema_version":"1.0","event_id":"sha256:f40a4fc318898509d6f8abbff2f833f3982379a6aea3b4db6185fd837c01e78e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:ANJMEENCPDEFSO75YSPSDQNLLJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Hashing over Predicted Future Frames for Informed Exploration of Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Haiyan Yin, Jianda Chen, Sinno Jialin Pan","submitted_at":"2017-07-03T13:07:40Z","abstract_excerpt":"In deep reinforcement learning (RL) tasks, an efficient exploration mechanism should be able to encourage an agent to take actions that lead to less frequent states which may yield higher accumulative future return. However, both knowing about the future and evaluating the frequentness of states are non-trivial tasks, especially for deep RL domains, where a state is represented by high-dimensional image frames. In this paper, we propose a novel informed exploration framework for deep RL, where we build the capability for an RL agent to predict over the future transitions and evaluate the frequ"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.00524","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:17:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wy7MDM1ulHGDtNd55+XIU5+7rMiFo0/SZaAhe8cl9VLjX9etUo1o1wdkeEu1MFJTTa40Mzc5jfq99Xm03YtCBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T23:06:52.063719Z"},"content_sha256":"4d2b94a8e7535a1b50612260a7cfc52b86695f4e43c3aeace621b3b4a1b64999","schema_version":"1.0","event_id":"sha256:4d2b94a8e7535a1b50612260a7cfc52b86695f4e43c3aeace621b3b4a1b64999"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ANJMEENCPDEFSO75YSPSDQNLLJ/bundle.json","state_url":"https://pith.science/pith/ANJMEENCPDEFSO75YSPSDQNLLJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ANJMEENCPDEFSO75YSPSDQNLLJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T23:06:52Z","links":{"resolver":"https://pith.science/pith/ANJMEENCPDEFSO75YSPSDQNLLJ","bundle":"https://pith.science/pith/ANJMEENCPDEFSO75YSPSDQNLLJ/bundle.json","state":"https://pith.science/pith/ANJMEENCPDEFSO75YSPSDQNLLJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ANJMEENCPDEFSO75YSPSDQNLLJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ANJMEENCPDEFSO75YSPSDQNLLJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bd76b0e5152840aab9d2cd75c9b2012b4dda56956af1481ed49c010d30eab453","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-03T13:07:40Z","title_canon_sha256":"8feabd5366200a3806e57b979590ccbe7341187fd1605db755633b9fdac7ba36"},"schema_version":"1.0","source":{"id":"1707.00524","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.00524","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"1707.00524v2","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.00524","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"ANJMEENCPDEF","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"ANJMEENCPDEFSO75","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"ANJMEENC","created_at":"2026-05-18T12:31:05Z"}],"graph_snapshots":[{"event_id":"sha256:4d2b94a8e7535a1b50612260a7cfc52b86695f4e43c3aeace621b3b4a1b64999","target":"graph","created_at":"2026-05-18T00:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In deep reinforcement learning (RL) tasks, an efficient exploration mechanism should be able to encourage an agent to take actions that lead to less frequent states which may yield higher accumulative future return. However, both knowing about the future and evaluating the frequentness of states are non-trivial tasks, especially for deep RL domains, where a state is represented by high-dimensional image frames. In this paper, we propose a novel informed exploration framework for deep RL, where we build the capability for an RL agent to predict over the future transitions and evaluate the frequ","authors_text":"Haiyan Yin, Jianda Chen, Sinno Jialin Pan","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-03T13:07:40Z","title":"Hashing over Predicted Future Frames for Informed Exploration of Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.00524","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f40a4fc318898509d6f8abbff2f833f3982379a6aea3b4db6185fd837c01e78e","target":"record","created_at":"2026-05-18T00:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bd76b0e5152840aab9d2cd75c9b2012b4dda56956af1481ed49c010d30eab453","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-07-03T13:07:40Z","title_canon_sha256":"8feabd5366200a3806e57b979590ccbe7341187fd1605db755633b9fdac7ba36"},"schema_version":"1.0","source":{"id":"1707.00524","kind":"arxiv","version":2}},"canonical_sha256":"0352c211a278c8593bfdc49f21c1ab5a73c60e3a050a1daaa68b453bcabd45fe","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0352c211a278c8593bfdc49f21c1ab5a73c60e3a050a1daaa68b453bcabd45fe","first_computed_at":"2026-05-18T00:17:24.275058Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:17:24.275058Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bHLCpFnSoOKDGqjdG+7bXaLoJp63jIA4qwH6Z3AsqrXZHUpFaPQQ82LcWF83iN74vR7MW0OpnSE4qGqGnxG9Cw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:17:24.275778Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.00524","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f40a4fc318898509d6f8abbff2f833f3982379a6aea3b4db6185fd837c01e78e","sha256:4d2b94a8e7535a1b50612260a7cfc52b86695f4e43c3aeace621b3b4a1b64999"],"state_sha256":"da583550c0625021af6df5a6c6d08560a951d55e1c4c20b6b30d752ea0a7ef0d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"f5jSVWoo53BmLtssPAT+oI/EJx9z/gCNuDCUe8OWYn9F2EO308ojjyw0fBizCh/eggGCT/SxwuoMMYpZBbHGBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T23:06:52.066069Z","bundle_sha256":"d9a6129ff81be1f3607501a7094d03df2b6e25d69e2773a70d411ffdadfcadad"}}