{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:L2OQAIZZUQRNBXMX744PUWSDQ7","short_pith_number":"pith:L2OQAIZZ","canonical_record":{"source":{"id":"1605.09674","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-31T15:34:36Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"95b4c6c31c986db664639f299ffbd77ed027e506513159489bd75ab211423ecb","abstract_canon_sha256":"8f07c7bcee528f9b3c202cb92a695b9fd2ea2b9654a010031d378eab58a89385"},"schema_version":"1.0"},"canonical_sha256":"5e9d002339a422d0dd97ff38fa5a4387c015bf5db73a7ff416e305c0e0e8a68b","source":{"kind":"arxiv","id":"1605.09674","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.09674","created_at":"2026-05-18T00:52:02Z"},{"alias_kind":"arxiv_version","alias_value":"1605.09674v4","created_at":"2026-05-18T00:52:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.09674","created_at":"2026-05-18T00:52:02Z"},{"alias_kind":"pith_short_12","alias_value":"L2OQAIZZUQRN","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_16","alias_value":"L2OQAIZZUQRNBXMX","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_8","alias_value":"L2OQAIZZ","created_at":"2026-05-18T12:30:29Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:L2OQAIZZUQRNBXMX744PUWSDQ7","target":"record","payload":{"canonical_record":{"source":{"id":"1605.09674","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-31T15:34:36Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"95b4c6c31c986db664639f299ffbd77ed027e506513159489bd75ab211423ecb","abstract_canon_sha256":"8f07c7bcee528f9b3c202cb92a695b9fd2ea2b9654a010031d378eab58a89385"},"schema_version":"1.0"},"canonical_sha256":"5e9d002339a422d0dd97ff38fa5a4387c015bf5db73a7ff416e305c0e0e8a68b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:52:02.058368Z","signature_b64":"kfgd0ZeUKEVms5zN2oV8X6WYm79Jli3jiolP2NBSXc/IkfOTTaVNqO/mFn4O5et5oZvHV8z7skDUi/FpEDGfDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5e9d002339a422d0dd97ff38fa5a4387c015bf5db73a7ff416e305c0e0e8a68b","last_reissued_at":"2026-05-18T00:52:02.057811Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:52:02.057811Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1605.09674","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:52:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"q+qeUAcZrxbJqO1OAXKkvWT88Xno2r2OpKIOnt/DlzIGXyD/JzRyZxu2AozqmXnq5X7DCbi/LH76r75/PJabBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T05:39:25.841397Z"},"content_sha256":"783396875b854c47593212dc8bb060811192dca196d1b2f3c143d069f4c9fca5","schema_version":"1.0","event_id":"sha256:783396875b854c47593212dc8bb060811192dca196d1b2f3c143d069f4c9fca5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:L2OQAIZZUQRNBXMX744PUWSDQ7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VIME: Variational Information Maximizing Exploration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Filip De Turck, John Schulman, Pieter Abbeel, Rein Houthooft, Xi Chen, Yan Duan","submitted_at":"2016-05-31T15:34:36Z","abstract_excerpt":"Scalable and effective exploration remains a key challenge in reinforcement learning (RL). While there are methods with optimality guarantees in the setting of discrete state and action spaces, these methods cannot be applied in high-dimensional deep RL scenarios. As such, most contemporary RL relies on simple heuristics such as epsilon-greedy exploration or adding Gaussian noise to the controls. This paper introduces Variational Information Maximizing Exploration (VIME), an exploration strategy based on maximization of information gain about the agent's belief of environment dynamics. We prop"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.09674","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:52:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1PM1mfEo1eDI4A5Coic7UX7FYfA09z6ueGFFaGw2FL1pwGL4J9uYpJy1FGHQil5CWc1xWQ5nCizxRgOjGTg8Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T05:39:25.841967Z"},"content_sha256":"58be916e9ea92e71260c04b4641e991c1ffacfbf9cc9784fa6a7a7688df4708a","schema_version":"1.0","event_id":"sha256:58be916e9ea92e71260c04b4641e991c1ffacfbf9cc9784fa6a7a7688df4708a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/L2OQAIZZUQRNBXMX744PUWSDQ7/bundle.json","state_url":"https://pith.science/pith/L2OQAIZZUQRNBXMX744PUWSDQ7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/L2OQAIZZUQRNBXMX744PUWSDQ7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T05:39:25Z","links":{"resolver":"https://pith.science/pith/L2OQAIZZUQRNBXMX744PUWSDQ7","bundle":"https://pith.science/pith/L2OQAIZZUQRNBXMX744PUWSDQ7/bundle.json","state":"https://pith.science/pith/L2OQAIZZUQRNBXMX744PUWSDQ7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/L2OQAIZZUQRNBXMX744PUWSDQ7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:L2OQAIZZUQRNBXMX744PUWSDQ7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8f07c7bcee528f9b3c202cb92a695b9fd2ea2b9654a010031d378eab58a89385","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-31T15:34:36Z","title_canon_sha256":"95b4c6c31c986db664639f299ffbd77ed027e506513159489bd75ab211423ecb"},"schema_version":"1.0","source":{"id":"1605.09674","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.09674","created_at":"2026-05-18T00:52:02Z"},{"alias_kind":"arxiv_version","alias_value":"1605.09674v4","created_at":"2026-05-18T00:52:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.09674","created_at":"2026-05-18T00:52:02Z"},{"alias_kind":"pith_short_12","alias_value":"L2OQAIZZUQRN","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_16","alias_value":"L2OQAIZZUQRNBXMX","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_8","alias_value":"L2OQAIZZ","created_at":"2026-05-18T12:30:29Z"}],"graph_snapshots":[{"event_id":"sha256:58be916e9ea92e71260c04b4641e991c1ffacfbf9cc9784fa6a7a7688df4708a","target":"graph","created_at":"2026-05-18T00:52:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Scalable and effective exploration remains a key challenge in reinforcement learning (RL). While there are methods with optimality guarantees in the setting of discrete state and action spaces, these methods cannot be applied in high-dimensional deep RL scenarios. As such, most contemporary RL relies on simple heuristics such as epsilon-greedy exploration or adding Gaussian noise to the controls. This paper introduces Variational Information Maximizing Exploration (VIME), an exploration strategy based on maximization of information gain about the agent's belief of environment dynamics. We prop","authors_text":"Filip De Turck, John Schulman, Pieter Abbeel, Rein Houthooft, Xi Chen, Yan Duan","cross_cats":["cs.AI","cs.RO","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-31T15:34:36Z","title":"VIME: Variational Information Maximizing Exploration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.09674","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:783396875b854c47593212dc8bb060811192dca196d1b2f3c143d069f4c9fca5","target":"record","created_at":"2026-05-18T00:52:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8f07c7bcee528f9b3c202cb92a695b9fd2ea2b9654a010031d378eab58a89385","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-31T15:34:36Z","title_canon_sha256":"95b4c6c31c986db664639f299ffbd77ed027e506513159489bd75ab211423ecb"},"schema_version":"1.0","source":{"id":"1605.09674","kind":"arxiv","version":4}},"canonical_sha256":"5e9d002339a422d0dd97ff38fa5a4387c015bf5db73a7ff416e305c0e0e8a68b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5e9d002339a422d0dd97ff38fa5a4387c015bf5db73a7ff416e305c0e0e8a68b","first_computed_at":"2026-05-18T00:52:02.057811Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:52:02.057811Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kfgd0ZeUKEVms5zN2oV8X6WYm79Jli3jiolP2NBSXc/IkfOTTaVNqO/mFn4O5et5oZvHV8z7skDUi/FpEDGfDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:52:02.058368Z","signed_message":"canonical_sha256_bytes"},"source_id":"1605.09674","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:783396875b854c47593212dc8bb060811192dca196d1b2f3c143d069f4c9fca5","sha256:58be916e9ea92e71260c04b4641e991c1ffacfbf9cc9784fa6a7a7688df4708a"],"state_sha256":"1e0905420a7e56b64e96aa86b04ba298bd56b2e74013569145ceff0413ddd229"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mfVQxMGl1tvn1awIVgGOGB4z2pU7amcvH469ROmZ+cM/NJirs3vn3UMXcqB1SEllR13QMhCaXUTvLSyEvqptAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T05:39:25.844697Z","bundle_sha256":"ca77a5fe4e87b86d39d8493508f8e4114f6a8630ca806b7256f73a704f1e8003"}}