{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:PS3R6IE6YD3VDR5OKPEA766QKW","short_pith_number":"pith:PS3R6IE6","canonical_record":{"source":{"id":"1907.00868","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2019-07-01T15:28:02Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"2001375c3ba45fb7bad3b2201ecbca323625e3d0cc48b81a5651f50c06f05e2f","abstract_canon_sha256":"67c7b8750ec6f2a9f606196c6db3b7a64ff2ca05a7d730b426dc0f49e1da3243"},"schema_version":"1.0"},"canonical_sha256":"7cb71f209ec0f751c7ae53c80ffbd055bb68fda56f0b8cace7e7352dca5e4be9","source":{"kind":"arxiv","id":"1907.00868","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.00868","created_at":"2026-05-17T23:41:47Z"},{"alias_kind":"arxiv_version","alias_value":"1907.00868v1","created_at":"2026-05-17T23:41:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.00868","created_at":"2026-05-17T23:41:47Z"},{"alias_kind":"pith_short_12","alias_value":"PS3R6IE6YD3V","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PS3R6IE6YD3VDR5O","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PS3R6IE6","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:PS3R6IE6YD3VDR5OKPEA766QKW","target":"record","payload":{"canonical_record":{"source":{"id":"1907.00868","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2019-07-01T15:28:02Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"2001375c3ba45fb7bad3b2201ecbca323625e3d0cc48b81a5651f50c06f05e2f","abstract_canon_sha256":"67c7b8750ec6f2a9f606196c6db3b7a64ff2ca05a7d730b426dc0f49e1da3243"},"schema_version":"1.0"},"canonical_sha256":"7cb71f209ec0f751c7ae53c80ffbd055bb68fda56f0b8cace7e7352dca5e4be9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:47.814619Z","signature_b64":"HP9FONyvJg+x+RtPpmdT7vpkr8TtI0GTZIageJRCYUvZrBe5QnkcAYmfpauI8iUjWOlhkz/VFF5Q8q6e5bMqDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7cb71f209ec0f751c7ae53c80ffbd055bb68fda56f0b8cace7e7352dca5e4be9","last_reissued_at":"2026-05-17T23:41:47.813911Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:47.813911Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.00868","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7zMy0jyKKiGIHoeRYC4oE+QVm7Rh4Cw3YJkrKut/OB4pz/nCeIeYGfX021gll/zEHkCfDiSR8983561ek9NHDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T23:35:01.530259Z"},"content_sha256":"eedc011e7665f528e958e68bf63cdd74ca33b388789e18a414bee3f9f574b963","schema_version":"1.0","event_id":"sha256:eedc011e7665f528e958e68bf63cdd74ca33b388789e18a414bee3f9f574b963"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:PS3R6IE6YD3VDR5OKPEA766QKW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MULEX: Disentangling Exploitation from Exploration in Deep RL","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Damien Vincent, Lucas Beyer, Matthieu Geist, Olivier Pietquin, Olivier Teboul, Sylvain Gelly","submitted_at":"2019-07-01T15:28:02Z","abstract_excerpt":"An agent learning through interactions should balance its action selection process between probing the environment to discover new rewards and using the information acquired in the past to adopt useful behaviour. This trade-off is usually obtained by perturbing either the agent's actions (e.g., e-greedy or Gibbs sampling) or the agent's parameters (e.g., NoisyNet), or by modifying the reward it receives (e.g., exploration bonus, intrinsic motivation, or hand-shaped rewards). Here, we adopt a disruptive but simple and generic perspective, where we explicitly disentangle exploration and exploita"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.00868","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gWCp0CMalqvyNIBV/pNTiECOOiY5gdcOj/iHv58riKml5qVNzfqyD4YX9NkqbKs6RY0ooGbl1eYZwwaADweSBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T23:35:01.531027Z"},"content_sha256":"5ce463690d254339292a66ee4ae57ff1860342f7184481f734cae92f90f963b1","schema_version":"1.0","event_id":"sha256:5ce463690d254339292a66ee4ae57ff1860342f7184481f734cae92f90f963b1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PS3R6IE6YD3VDR5OKPEA766QKW/bundle.json","state_url":"https://pith.science/pith/PS3R6IE6YD3VDR5OKPEA766QKW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PS3R6IE6YD3VDR5OKPEA766QKW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T23:35:01Z","links":{"resolver":"https://pith.science/pith/PS3R6IE6YD3VDR5OKPEA766QKW","bundle":"https://pith.science/pith/PS3R6IE6YD3VDR5OKPEA766QKW/bundle.json","state":"https://pith.science/pith/PS3R6IE6YD3VDR5OKPEA766QKW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PS3R6IE6YD3VDR5OKPEA766QKW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:PS3R6IE6YD3VDR5OKPEA766QKW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"67c7b8750ec6f2a9f606196c6db3b7a64ff2ca05a7d730b426dc0f49e1da3243","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2019-07-01T15:28:02Z","title_canon_sha256":"2001375c3ba45fb7bad3b2201ecbca323625e3d0cc48b81a5651f50c06f05e2f"},"schema_version":"1.0","source":{"id":"1907.00868","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.00868","created_at":"2026-05-17T23:41:47Z"},{"alias_kind":"arxiv_version","alias_value":"1907.00868v1","created_at":"2026-05-17T23:41:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.00868","created_at":"2026-05-17T23:41:47Z"},{"alias_kind":"pith_short_12","alias_value":"PS3R6IE6YD3V","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PS3R6IE6YD3VDR5O","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PS3R6IE6","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:5ce463690d254339292a66ee4ae57ff1860342f7184481f734cae92f90f963b1","target":"graph","created_at":"2026-05-17T23:41:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"An agent learning through interactions should balance its action selection process between probing the environment to discover new rewards and using the information acquired in the past to adopt useful behaviour. This trade-off is usually obtained by perturbing either the agent's actions (e.g., e-greedy or Gibbs sampling) or the agent's parameters (e.g., NoisyNet), or by modifying the reward it receives (e.g., exploration bonus, intrinsic motivation, or hand-shaped rewards). Here, we adopt a disruptive but simple and generic perspective, where we explicitly disentangle exploration and exploita","authors_text":"Damien Vincent, Lucas Beyer, Matthieu Geist, Olivier Pietquin, Olivier Teboul, Sylvain Gelly","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2019-07-01T15:28:02Z","title":"MULEX: Disentangling Exploitation from Exploration in Deep RL"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.00868","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:eedc011e7665f528e958e68bf63cdd74ca33b388789e18a414bee3f9f574b963","target":"record","created_at":"2026-05-17T23:41:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"67c7b8750ec6f2a9f606196c6db3b7a64ff2ca05a7d730b426dc0f49e1da3243","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2019-07-01T15:28:02Z","title_canon_sha256":"2001375c3ba45fb7bad3b2201ecbca323625e3d0cc48b81a5651f50c06f05e2f"},"schema_version":"1.0","source":{"id":"1907.00868","kind":"arxiv","version":1}},"canonical_sha256":"7cb71f209ec0f751c7ae53c80ffbd055bb68fda56f0b8cace7e7352dca5e4be9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7cb71f209ec0f751c7ae53c80ffbd055bb68fda56f0b8cace7e7352dca5e4be9","first_computed_at":"2026-05-17T23:41:47.813911Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:41:47.813911Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HP9FONyvJg+x+RtPpmdT7vpkr8TtI0GTZIageJRCYUvZrBe5QnkcAYmfpauI8iUjWOlhkz/VFF5Q8q6e5bMqDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:41:47.814619Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.00868","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:eedc011e7665f528e958e68bf63cdd74ca33b388789e18a414bee3f9f574b963","sha256:5ce463690d254339292a66ee4ae57ff1860342f7184481f734cae92f90f963b1"],"state_sha256":"4162cf006650579f4e5d1bc9663c5b0fb8668ae4dace9b02a74f9bf9b878e738"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"H/YolWLOuFrUTnn9tNhnRP13KNiqd8yeZMccG2tsu54a3ZuDLfE9CzPu1UoU+qP1DD3If/HG3TK0T0p+CoTtDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T23:35:01.535101Z","bundle_sha256":"fb38eb5937990ff2c63f820ab73fd32becc440d6ac9a72d996efa3f3c60febf6"}}