{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:H6R74ATOAUP5BNZ5VZ2GZ3OMAX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2a77df604fd14242246b9b305322e74dbceeed3165a1b2256340ac5127775870","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T19:47:24Z","title_canon_sha256":"9543983c94e53078b8528345fb797e9a57a17de8c4dd3137ac893efb0ca2e3f4"},"schema_version":"1.0","source":{"id":"1806.01347","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01347","created_at":"2026-05-17T23:46:36Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01347v3","created_at":"2026-05-17T23:46:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01347","created_at":"2026-05-17T23:46:36Z"},{"alias_kind":"pith_short_12","alias_value":"H6R74ATOAUP5","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"H6R74ATOAUP5BNZ5","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"H6R74ATO","created_at":"2026-05-18T12:32:28Z"}],"graph_snapshots":[{"event_id":"sha256:60f9ebedacc44becdd29bf067c17a78087599303f7342469f9476f1bc9442d55","target":"graph","created_at":"2026-05-17T23:46:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the problem of off-policy evaluation in Markov decision processes. Off-policy evaluation is the task of evaluating the expected return of one policy with data generated by a different, behavior policy. Importance sampling is a technique for off-policy evaluation that re-weights off-policy returns to account for differences in the likelihood of the returns between the two policies. In this paper, we study importance sampling with an estimated behavior policy where the behavior policy estimate comes from the same set of data used to compute the importance sampling estimate. We find t","authors_text":"Josiah P. Hanna, Peter Stone, Scott Niekum","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T19:47:24Z","title":"Importance Sampling Policy Evaluation with an Estimated Behavior Policy"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01347","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:80fc0c7d955655b6349c4d21b9775305d62eea811f874d456b251cba432cee2e","target":"record","created_at":"2026-05-17T23:46:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2a77df604fd14242246b9b305322e74dbceeed3165a1b2256340ac5127775870","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-04T19:47:24Z","title_canon_sha256":"9543983c94e53078b8528345fb797e9a57a17de8c4dd3137ac893efb0ca2e3f4"},"schema_version":"1.0","source":{"id":"1806.01347","kind":"arxiv","version":3}},"canonical_sha256":"3fa3fe026e051fd0b73dae746cedcc05e323c84a5a44a5baac8b39b39e1ced65","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3fa3fe026e051fd0b73dae746cedcc05e323c84a5a44a5baac8b39b39e1ced65","first_computed_at":"2026-05-17T23:46:36.272160Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:46:36.272160Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"64da+ked+H3jCjhz8CQuDUvL3rCMLIYlB/MS/oNyAygeW5khMEJpNreMUr0iQVNLDJaa5qV/mVUl5X7gikvlDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:46:36.272826Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.01347","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:80fc0c7d955655b6349c4d21b9775305d62eea811f874d456b251cba432cee2e","sha256:60f9ebedacc44becdd29bf067c17a78087599303f7342469f9476f1bc9442d55"],"state_sha256":"813fb8bef7617f2123354a4040124d84ffee026a1123520efc34aadc88a26030"}