{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:5A35SJ5TBELW2DCSMIJO7Y7PGX","short_pith_number":"pith:5A35SJ5T","canonical_record":{"source":{"id":"1703.01260","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-03T17:38:59Z","cross_cats_sorted":[],"title_canon_sha256":"536a13fc31c8a8a1108fc7f56038ef02c0769f9beca1ec530da243717107ddfb","abstract_canon_sha256":"1ca59b69863c807bb2c53936169fcc154e3dd58651ce995e51b74f116552f0bc"},"schema_version":"1.0"},"canonical_sha256":"e837d927b309176d0c526212efe3ef35c08363c6d1ccbced5adcff6b6ad2fc29","source":{"kind":"arxiv","id":"1703.01260","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.01260","created_at":"2026-05-18T00:43:35Z"},{"alias_kind":"arxiv_version","alias_value":"1703.01260v2","created_at":"2026-05-18T00:43:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.01260","created_at":"2026-05-18T00:43:35Z"},{"alias_kind":"pith_short_12","alias_value":"5A35SJ5TBELW","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"5A35SJ5TBELW2DCS","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"5A35SJ5T","created_at":"2026-05-18T12:31:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:5A35SJ5TBELW2DCSMIJO7Y7PGX","target":"record","payload":{"canonical_record":{"source":{"id":"1703.01260","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-03T17:38:59Z","cross_cats_sorted":[],"title_canon_sha256":"536a13fc31c8a8a1108fc7f56038ef02c0769f9beca1ec530da243717107ddfb","abstract_canon_sha256":"1ca59b69863c807bb2c53936169fcc154e3dd58651ce995e51b74f116552f0bc"},"schema_version":"1.0"},"canonical_sha256":"e837d927b309176d0c526212efe3ef35c08363c6d1ccbced5adcff6b6ad2fc29","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:43:35.169361Z","signature_b64":"rSDR2TyOCarNSDkixsGGEcvDs84xOw9u/JHnix1U3WQ0iszDW1G7PuoajPDmu6gCrv6ow0NiRpwJppYn5fvpDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e837d927b309176d0c526212efe3ef35c08363c6d1ccbced5adcff6b6ad2fc29","last_reissued_at":"2026-05-18T00:43:35.168923Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:43:35.168923Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1703.01260","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7NEyyksPBf7Obw/SCJaChzwT5GgihmLeOHVdNVTdFnB58ZjilvG5200AkezILnjhqluGvbfrr5mCWoKvel8+CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T01:48:37.742603Z"},"content_sha256":"96e2264ab240a701bd020210f21e62a3d1c502277d60cb5584c372cc8d9bd29b","schema_version":"1.0","event_id":"sha256:96e2264ab240a701bd020210f21e62a3d1c502277d60cb5584c372cc8d9bd29b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:5A35SJ5TBELW2DCSMIJO7Y7PGX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"EX2: Exploration with Exemplar Models for Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"John D. Co-Reyes, Justin Fu, Sergey Levine","submitted_at":"2017-03-03T17:38:59Z","abstract_excerpt":"Deep reinforcement learning algorithms have been shown to learn complex tasks using highly general policy classes. However, sparse reward problems remain a significant challenge. Exploration methods based on novelty detection have been particularly successful in such settings but typically require generative or predictive models of the observations, which can be difficult to train when the observations are very high-dimensional and complex, as in the case of raw images. We propose a novelty detection algorithm for exploration that is based entirely on discriminatively trained exemplar models, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.01260","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sTJB/cvu+QTv4/iN0ftb7ZsZe9l6g6PdWHZhVGMglrmLwpNOVEr3tuQa5hxBK+xFOa3WbaQZs48ACDDIBC82DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T01:48:37.743283Z"},"content_sha256":"af0803ec739727d3c38c8a3c7a709ada06f56580a3ee610b74cdd25bb898e56f","schema_version":"1.0","event_id":"sha256:af0803ec739727d3c38c8a3c7a709ada06f56580a3ee610b74cdd25bb898e56f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX/bundle.json","state_url":"https://pith.science/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T01:48:37Z","links":{"resolver":"https://pith.science/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX","bundle":"https://pith.science/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX/bundle.json","state":"https://pith.science/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5A35SJ5TBELW2DCSMIJO7Y7PGX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:5A35SJ5TBELW2DCSMIJO7Y7PGX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1ca59b69863c807bb2c53936169fcc154e3dd58651ce995e51b74f116552f0bc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-03T17:38:59Z","title_canon_sha256":"536a13fc31c8a8a1108fc7f56038ef02c0769f9beca1ec530da243717107ddfb"},"schema_version":"1.0","source":{"id":"1703.01260","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.01260","created_at":"2026-05-18T00:43:35Z"},{"alias_kind":"arxiv_version","alias_value":"1703.01260v2","created_at":"2026-05-18T00:43:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.01260","created_at":"2026-05-18T00:43:35Z"},{"alias_kind":"pith_short_12","alias_value":"5A35SJ5TBELW","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"5A35SJ5TBELW2DCS","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"5A35SJ5T","created_at":"2026-05-18T12:31:00Z"}],"graph_snapshots":[{"event_id":"sha256:af0803ec739727d3c38c8a3c7a709ada06f56580a3ee610b74cdd25bb898e56f","target":"graph","created_at":"2026-05-18T00:43:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep reinforcement learning algorithms have been shown to learn complex tasks using highly general policy classes. However, sparse reward problems remain a significant challenge. Exploration methods based on novelty detection have been particularly successful in such settings but typically require generative or predictive models of the observations, which can be difficult to train when the observations are very high-dimensional and complex, as in the case of raw images. We propose a novelty detection algorithm for exploration that is based entirely on discriminatively trained exemplar models, ","authors_text":"John D. Co-Reyes, Justin Fu, Sergey Levine","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-03T17:38:59Z","title":"EX2: Exploration with Exemplar Models for Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.01260","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:96e2264ab240a701bd020210f21e62a3d1c502277d60cb5584c372cc8d9bd29b","target":"record","created_at":"2026-05-18T00:43:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1ca59b69863c807bb2c53936169fcc154e3dd58651ce995e51b74f116552f0bc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-03-03T17:38:59Z","title_canon_sha256":"536a13fc31c8a8a1108fc7f56038ef02c0769f9beca1ec530da243717107ddfb"},"schema_version":"1.0","source":{"id":"1703.01260","kind":"arxiv","version":2}},"canonical_sha256":"e837d927b309176d0c526212efe3ef35c08363c6d1ccbced5adcff6b6ad2fc29","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e837d927b309176d0c526212efe3ef35c08363c6d1ccbced5adcff6b6ad2fc29","first_computed_at":"2026-05-18T00:43:35.168923Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:43:35.168923Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"rSDR2TyOCarNSDkixsGGEcvDs84xOw9u/JHnix1U3WQ0iszDW1G7PuoajPDmu6gCrv6ow0NiRpwJppYn5fvpDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:43:35.169361Z","signed_message":"canonical_sha256_bytes"},"source_id":"1703.01260","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:96e2264ab240a701bd020210f21e62a3d1c502277d60cb5584c372cc8d9bd29b","sha256:af0803ec739727d3c38c8a3c7a709ada06f56580a3ee610b74cdd25bb898e56f"],"state_sha256":"b56a79fbc576d1e600d3bb05825219c0a6b3e1a359eb8b8c7619890a0f3b440a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vPZBxDgauiXu7pq7NY3hujtr+sBClI7MLpLHCsgJV1+QCU6a+7+JrWlngkbkXYPU3eaQ1bC+bkhPyt4XmnfkBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T01:48:37.747406Z","bundle_sha256":"21452986ae27784cd2c32157c4d8b7dd1f3eb4e670c652c5fb37dd18cb691210"}}