{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:KB4ZHPHQDQ7CGUFCT7PJKMOBOX","short_pith_number":"pith:KB4ZHPHQ","canonical_record":{"source":{"id":"1906.03735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-09T23:15:49Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"35414b9c02d5ef2d5bb109cd7f08d4594a61ca2e6210edcf989d6506da4dc979","abstract_canon_sha256":"006b28d59a564e7312b745c1bbafa3f51bd02b50e52eb5b136fea694c575de58"},"schema_version":"1.0"},"canonical_sha256":"507993bcf01c3e2350a29fde9531c175ec1b1e57a71a0ac0073837366851a361","source":{"kind":"arxiv","id":"1906.03735","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.03735","created_at":"2026-05-17T23:43:45Z"},{"alias_kind":"arxiv_version","alias_value":"1906.03735v1","created_at":"2026-05-17T23:43:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.03735","created_at":"2026-05-17T23:43:45Z"},{"alias_kind":"pith_short_12","alias_value":"KB4ZHPHQDQ7C","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"KB4ZHPHQDQ7CGUFC","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"KB4ZHPHQ","created_at":"2026-05-18T12:33:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:KB4ZHPHQDQ7CGUFCT7PJKMOBOX","target":"record","payload":{"canonical_record":{"source":{"id":"1906.03735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-09T23:15:49Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"35414b9c02d5ef2d5bb109cd7f08d4594a61ca2e6210edcf989d6506da4dc979","abstract_canon_sha256":"006b28d59a564e7312b745c1bbafa3f51bd02b50e52eb5b136fea694c575de58"},"schema_version":"1.0"},"canonical_sha256":"507993bcf01c3e2350a29fde9531c175ec1b1e57a71a0ac0073837366851a361","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:45.118392Z","signature_b64":"PGlUou2wiIXvUZXpXpqcZ8wkCREhx0Y29nLMzC2/j2ANmfhEm2Dq8DCP9UF2BWfUYHjNUHA3Hg1/5YzpRoU+DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"507993bcf01c3e2350a29fde9531c175ec1b1e57a71a0ac0073837366851a361","last_reissued_at":"2026-05-17T23:43:45.117788Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:45.117788Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.03735","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fqE1MwvJvqQRP73r8ROySZegr1IwpHjaTYYj/psnnGqxwcXf52G2Tlo06sxGKdOrpuOZ5TxTi+nk/jtIfNJUDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T15:19:42.757520Z"},"content_sha256":"72b1802e5cca20e35a220177832819ece1878f178ba9e3837a5a210f677bbc79","schema_version":"1.0","event_id":"sha256:72b1802e5cca20e35a220177832819ece1878f178ba9e3837a5a210f677bbc79"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:KB4ZHPHQDQ7CGUFCT7PJKMOBOX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Intrinsically Efficient, Stable, and Bounded Off-Policy Evaluation for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Masatoshi Uehara, Nathan Kallus","submitted_at":"2019-06-09T23:15:49Z","abstract_excerpt":"Off-policy evaluation (OPE) in both contextual bandits and reinforcement learning allows one to evaluate novel decision policies without needing to conduct exploration, which is often costly or otherwise infeasible. The problem's importance has attracted many proposed solutions, including importance sampling (IS), self-normalized IS (SNIS), and doubly robust (DR) estimates. DR and its variants ensure semiparametric local efficiency if Q-functions are well-specified, but if they are not they can be worse than both IS and SNIS. It also does not enjoy SNIS's inherent stability and boundedness. We"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.03735","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RxKpxxYtuU7Pl41H/iITfPwO5JnQ/Z+Xe/CSxnsZ+jmNBQbZ5HSWL5JV5yPizoShKlmRzkYrO93dBxfhSGxlAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T15:19:42.757858Z"},"content_sha256":"61ec5740dbd4faad4dd3b6446fe9ec7d543697680348d4da7f3c32fd9891fcee","schema_version":"1.0","event_id":"sha256:61ec5740dbd4faad4dd3b6446fe9ec7d543697680348d4da7f3c32fd9891fcee"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX/bundle.json","state_url":"https://pith.science/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T15:19:42Z","links":{"resolver":"https://pith.science/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX","bundle":"https://pith.science/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX/bundle.json","state":"https://pith.science/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KB4ZHPHQDQ7CGUFCT7PJKMOBOX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:KB4ZHPHQDQ7CGUFCT7PJKMOBOX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"006b28d59a564e7312b745c1bbafa3f51bd02b50e52eb5b136fea694c575de58","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-09T23:15:49Z","title_canon_sha256":"35414b9c02d5ef2d5bb109cd7f08d4594a61ca2e6210edcf989d6506da4dc979"},"schema_version":"1.0","source":{"id":"1906.03735","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.03735","created_at":"2026-05-17T23:43:45Z"},{"alias_kind":"arxiv_version","alias_value":"1906.03735v1","created_at":"2026-05-17T23:43:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.03735","created_at":"2026-05-17T23:43:45Z"},{"alias_kind":"pith_short_12","alias_value":"KB4ZHPHQDQ7C","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"KB4ZHPHQDQ7CGUFC","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"KB4ZHPHQ","created_at":"2026-05-18T12:33:21Z"}],"graph_snapshots":[{"event_id":"sha256:61ec5740dbd4faad4dd3b6446fe9ec7d543697680348d4da7f3c32fd9891fcee","target":"graph","created_at":"2026-05-17T23:43:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Off-policy evaluation (OPE) in both contextual bandits and reinforcement learning allows one to evaluate novel decision policies without needing to conduct exploration, which is often costly or otherwise infeasible. The problem's importance has attracted many proposed solutions, including importance sampling (IS), self-normalized IS (SNIS), and doubly robust (DR) estimates. DR and its variants ensure semiparametric local efficiency if Q-functions are well-specified, but if they are not they can be worse than both IS and SNIS. It also does not enjoy SNIS's inherent stability and boundedness. We","authors_text":"Masatoshi Uehara, Nathan Kallus","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-09T23:15:49Z","title":"Intrinsically Efficient, Stable, and Bounded Off-Policy Evaluation for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.03735","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:72b1802e5cca20e35a220177832819ece1878f178ba9e3837a5a210f677bbc79","target":"record","created_at":"2026-05-17T23:43:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"006b28d59a564e7312b745c1bbafa3f51bd02b50e52eb5b136fea694c575de58","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-09T23:15:49Z","title_canon_sha256":"35414b9c02d5ef2d5bb109cd7f08d4594a61ca2e6210edcf989d6506da4dc979"},"schema_version":"1.0","source":{"id":"1906.03735","kind":"arxiv","version":1}},"canonical_sha256":"507993bcf01c3e2350a29fde9531c175ec1b1e57a71a0ac0073837366851a361","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"507993bcf01c3e2350a29fde9531c175ec1b1e57a71a0ac0073837366851a361","first_computed_at":"2026-05-17T23:43:45.117788Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:43:45.117788Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"PGlUou2wiIXvUZXpXpqcZ8wkCREhx0Y29nLMzC2/j2ANmfhEm2Dq8DCP9UF2BWfUYHjNUHA3Hg1/5YzpRoU+DA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:43:45.118392Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.03735","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:72b1802e5cca20e35a220177832819ece1878f178ba9e3837a5a210f677bbc79","sha256:61ec5740dbd4faad4dd3b6446fe9ec7d543697680348d4da7f3c32fd9891fcee"],"state_sha256":"8396ca7ffce2d76db8f71a662ac0074ab229dff5a341cb560e6d031515dfb115"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+bPU3ClOvTaVHl789ZDp5RqJ/+GmSTRtQtgWT7NWY/JpgjOFQJz0clRulpG66Lv3FAXKhlmWDgxL+1W/7JbfCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T15:19:42.759810Z","bundle_sha256":"dd3f7a75e7325a7d2d234bea1e8ad91b8df3797404e90900551923587a04fa36"}}