{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:GPLEA2RIASRI75YOBHQ7FK27Q7","short_pith_number":"pith:GPLEA2RI","canonical_record":{"source":{"id":"1811.06272","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T10:08:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d1dbca7481045412161e03ccdf4966341d6c673b4dcce6b894f99f3752e99ada","abstract_canon_sha256":"1255f28e582843d007311ce221681999cba5bc4cd165a9b3daa25bd2afd9da1f"},"schema_version":"1.0"},"canonical_sha256":"33d6406a2804a28ff70e09e1f2ab5f87f84985d68e3a92fe8aa55aa7030f5c26","source":{"kind":"arxiv","id":"1811.06272","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.06272","created_at":"2026-05-18T00:00:38Z"},{"alias_kind":"arxiv_version","alias_value":"1811.06272v1","created_at":"2026-05-18T00:00:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.06272","created_at":"2026-05-18T00:00:38Z"},{"alias_kind":"pith_short_12","alias_value":"GPLEA2RIASRI","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"GPLEA2RIASRI75YO","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"GPLEA2RI","created_at":"2026-05-18T12:32:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:GPLEA2RIASRI75YOBHQ7FK27Q7","target":"record","payload":{"canonical_record":{"source":{"id":"1811.06272","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T10:08:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d1dbca7481045412161e03ccdf4966341d6c673b4dcce6b894f99f3752e99ada","abstract_canon_sha256":"1255f28e582843d007311ce221681999cba5bc4cd165a9b3daa25bd2afd9da1f"},"schema_version":"1.0"},"canonical_sha256":"33d6406a2804a28ff70e09e1f2ab5f87f84985d68e3a92fe8aa55aa7030f5c26","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:38.153906Z","signature_b64":"fr7GnKcHadsDcbp+ZPirlpiqFCM4fV2ODQOl2E64r6LDJj4K8HvS/h8MtxfERlzui8o5Qbmh/6B6tW4V1rFcAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"33d6406a2804a28ff70e09e1f2ab5f87f84985d68e3a92fe8aa55aa7030f5c26","last_reissued_at":"2026-05-18T00:00:38.153390Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:38.153390Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.06272","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Qu0ztoFtKZZHh3RvS0sUSck6lOaKhGxQDkEv4xMZLCBC6+ELmg3O03oKqc1Gha9IP4afPuhxOBVEQeIbxvYpAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T09:55:51.232909Z"},"content_sha256":"20861f4f49d49aac9195e3c5782d5b4966a863f0bb3105c611da6ca156d37390","schema_version":"1.0","event_id":"sha256:20861f4f49d49aac9195e3c5782d5b4966a863f0bb3105c611da6ca156d37390"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:GPLEA2RIASRI75YOBHQ7FK27Q7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Woulda, Coulda, Shoulda: Counterfactually-Guided Policy Search","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Arthur Guez, Jean-Baptiste Lespiau, Lars Buesing, Nicolas Heess, Sebastien Racaniere, Theophane Weber, Yori Zwols","submitted_at":"2018-11-15T10:08:58Z","abstract_excerpt":"Learning policies on data synthesized by models can in principle quench the thirst of reinforcement learning algorithms for large amounts of real experience, which is often costly to acquire. However, simulating plausible experience de novo is a hard problem for many complex environments, often resulting in biases for model-based policy evaluation and search. Instead of de novo synthesis of data, here we assume logged, real experience and model alternative outcomes of this experience under counterfactual actions, actions that were not actually taken. Based on this, we propose the Counterfactua"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.06272","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"15+HATC+8vUVges83D0buVs7SG6TzPOpgmV4gHYKEf3N6c0HigZ1KFozjLHl8ut3W+Q4MTQDXAqSZ/QZdTJoDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T09:55:51.233648Z"},"content_sha256":"e04138f3e92b826ecb3cadfabbdff5251f69f8d144beb24752c590a46e73829b","schema_version":"1.0","event_id":"sha256:e04138f3e92b826ecb3cadfabbdff5251f69f8d144beb24752c590a46e73829b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GPLEA2RIASRI75YOBHQ7FK27Q7/bundle.json","state_url":"https://pith.science/pith/GPLEA2RIASRI75YOBHQ7FK27Q7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GPLEA2RIASRI75YOBHQ7FK27Q7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T09:55:51Z","links":{"resolver":"https://pith.science/pith/GPLEA2RIASRI75YOBHQ7FK27Q7","bundle":"https://pith.science/pith/GPLEA2RIASRI75YOBHQ7FK27Q7/bundle.json","state":"https://pith.science/pith/GPLEA2RIASRI75YOBHQ7FK27Q7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GPLEA2RIASRI75YOBHQ7FK27Q7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:GPLEA2RIASRI75YOBHQ7FK27Q7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1255f28e582843d007311ce221681999cba5bc4cd165a9b3daa25bd2afd9da1f","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T10:08:58Z","title_canon_sha256":"d1dbca7481045412161e03ccdf4966341d6c673b4dcce6b894f99f3752e99ada"},"schema_version":"1.0","source":{"id":"1811.06272","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.06272","created_at":"2026-05-18T00:00:38Z"},{"alias_kind":"arxiv_version","alias_value":"1811.06272v1","created_at":"2026-05-18T00:00:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.06272","created_at":"2026-05-18T00:00:38Z"},{"alias_kind":"pith_short_12","alias_value":"GPLEA2RIASRI","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"GPLEA2RIASRI75YO","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"GPLEA2RI","created_at":"2026-05-18T12:32:25Z"}],"graph_snapshots":[{"event_id":"sha256:e04138f3e92b826ecb3cadfabbdff5251f69f8d144beb24752c590a46e73829b","target":"graph","created_at":"2026-05-18T00:00:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Learning policies on data synthesized by models can in principle quench the thirst of reinforcement learning algorithms for large amounts of real experience, which is often costly to acquire. However, simulating plausible experience de novo is a hard problem for many complex environments, often resulting in biases for model-based policy evaluation and search. Instead of de novo synthesis of data, here we assume logged, real experience and model alternative outcomes of this experience under counterfactual actions, actions that were not actually taken. Based on this, we propose the Counterfactua","authors_text":"Arthur Guez, Jean-Baptiste Lespiau, Lars Buesing, Nicolas Heess, Sebastien Racaniere, Theophane Weber, Yori Zwols","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T10:08:58Z","title":"Woulda, Coulda, Shoulda: Counterfactually-Guided Policy Search"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.06272","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20861f4f49d49aac9195e3c5782d5b4966a863f0bb3105c611da6ca156d37390","target":"record","created_at":"2026-05-18T00:00:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1255f28e582843d007311ce221681999cba5bc4cd165a9b3daa25bd2afd9da1f","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T10:08:58Z","title_canon_sha256":"d1dbca7481045412161e03ccdf4966341d6c673b4dcce6b894f99f3752e99ada"},"schema_version":"1.0","source":{"id":"1811.06272","kind":"arxiv","version":1}},"canonical_sha256":"33d6406a2804a28ff70e09e1f2ab5f87f84985d68e3a92fe8aa55aa7030f5c26","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"33d6406a2804a28ff70e09e1f2ab5f87f84985d68e3a92fe8aa55aa7030f5c26","first_computed_at":"2026-05-18T00:00:38.153390Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:00:38.153390Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fr7GnKcHadsDcbp+ZPirlpiqFCM4fV2ODQOl2E64r6LDJj4K8HvS/h8MtxfERlzui8o5Qbmh/6B6tW4V1rFcAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:00:38.153906Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.06272","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20861f4f49d49aac9195e3c5782d5b4966a863f0bb3105c611da6ca156d37390","sha256:e04138f3e92b826ecb3cadfabbdff5251f69f8d144beb24752c590a46e73829b"],"state_sha256":"fe357427af6a156d5ade77cc6ce3a460b93c6cdf185616c7c2e2f2b5fc07e0df"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mGPcPJbUy/VnplASiK/zsJKeBtkydsgaXATieJ/J8Jbef7OLvNnnBoUkwVCEvwuH2uOkhFYVHMqpVzT0ZAZPBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T09:55:51.237490Z","bundle_sha256":"22ce3031444fd2ea2419f68c246d2091d76ec5ff05e19166c80c69b1043b5100"}}