{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:Z36DKIW5RCV4IDJB67GHMF5K6C","short_pith_number":"pith:Z36DKIW5","canonical_record":{"source":{"id":"1607.03842","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-07-13T17:55:45Z","cross_cats_sorted":[],"title_canon_sha256":"79cf28bc4ee7d00bb2093405bac46a3bea83b2050c6a5f8d633406358e9fd94f","abstract_canon_sha256":"a2754d1787251a8ec18bccdea147f5c6c13851763a06a20ce62193c8dd2b0990"},"schema_version":"1.0"},"canonical_sha256":"cefc3522dd88abc40d21f7cc7617aaf0bfc79d27f1cea3c2f1716874d0f550b8","source":{"kind":"arxiv","id":"1607.03842","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1607.03842","created_at":"2026-05-18T01:11:05Z"},{"alias_kind":"arxiv_version","alias_value":"1607.03842v1","created_at":"2026-05-18T01:11:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1607.03842","created_at":"2026-05-18T01:11:05Z"},{"alias_kind":"pith_short_12","alias_value":"Z36DKIW5RCV4","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_16","alias_value":"Z36DKIW5RCV4IDJB","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_8","alias_value":"Z36DKIW5","created_at":"2026-05-18T12:30:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:Z36DKIW5RCV4IDJB67GHMF5K6C","target":"record","payload":{"canonical_record":{"source":{"id":"1607.03842","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-07-13T17:55:45Z","cross_cats_sorted":[],"title_canon_sha256":"79cf28bc4ee7d00bb2093405bac46a3bea83b2050c6a5f8d633406358e9fd94f","abstract_canon_sha256":"a2754d1787251a8ec18bccdea147f5c6c13851763a06a20ce62193c8dd2b0990"},"schema_version":"1.0"},"canonical_sha256":"cefc3522dd88abc40d21f7cc7617aaf0bfc79d27f1cea3c2f1716874d0f550b8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:11:05.560615Z","signature_b64":"RG6h3X95+hXuXeZ7DghAufGao7a+t0I0H/4GZZP7rwwGe++L6uL19+aDZozzkOd0yH1NMPt7GwFIfPlPH4pFAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cefc3522dd88abc40d21f7cc7617aaf0bfc79d27f1cea3c2f1716874d0f550b8","last_reissued_at":"2026-05-18T01:11:05.560022Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:11:05.560022Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1607.03842","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:11:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"u1zdk6uN4/aZ+bz2cFFk0d7/mDYIesotHBfL5han1nA9zpBXjat/kcM11NsgOA7HcQ0sTuG4KlvGSANcRZpDCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T02:44:37.701707Z"},"content_sha256":"8b42e2b8214594897ab4fc8dac0c46f2a3fbb76ca21a28229ca1c25eb447008f","schema_version":"1.0","event_id":"sha256:8b42e2b8214594897ab4fc8dac0c46f2a3fbb76ca21a28229ca1c25eb447008f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:Z36DKIW5RCV4IDJB67GHMF5K6C","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safe Policy Improvement by Minimizing Robust Baseline Regret","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ML","authors_text":"Marek Petrik, Mohammad Ghavamzadeh, Yinlam Chow","submitted_at":"2016-07-13T17:55:45Z","abstract_excerpt":"An important problem in sequential decision-making under uncertainty is to use limited data to compute a safe policy, i.e., a policy that is guaranteed to perform at least as well as a given baseline strategy. In this paper, we develop and analyze a new model-based approach to compute a safe policy when we have access to an inaccurate dynamics model of the system with known accuracy guarantees. Our proposed robust method uses this (inaccurate) model to directly minimize the (negative) regret w.r.t. the baseline policy. Contrary to the existing approaches, minimizing the regret allows one to im"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1607.03842","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:11:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yhFmNBJL79Dpdk8f9rcbVl/7pGBSVg0wMqp1YZcEbjYrNcv23uwNbGWTcUiSPRebswNNs5pIDKZ8VlVgvmdJDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T02:44:37.702102Z"},"content_sha256":"125d1d54e12e2d29d8617b813a25b3405559ea8305aae0b4734f34b3a762bcae","schema_version":"1.0","event_id":"sha256:125d1d54e12e2d29d8617b813a25b3405559ea8305aae0b4734f34b3a762bcae"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Z36DKIW5RCV4IDJB67GHMF5K6C/bundle.json","state_url":"https://pith.science/pith/Z36DKIW5RCV4IDJB67GHMF5K6C/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Z36DKIW5RCV4IDJB67GHMF5K6C/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T02:44:37Z","links":{"resolver":"https://pith.science/pith/Z36DKIW5RCV4IDJB67GHMF5K6C","bundle":"https://pith.science/pith/Z36DKIW5RCV4IDJB67GHMF5K6C/bundle.json","state":"https://pith.science/pith/Z36DKIW5RCV4IDJB67GHMF5K6C/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Z36DKIW5RCV4IDJB67GHMF5K6C/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:Z36DKIW5RCV4IDJB67GHMF5K6C","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a2754d1787251a8ec18bccdea147f5c6c13851763a06a20ce62193c8dd2b0990","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-07-13T17:55:45Z","title_canon_sha256":"79cf28bc4ee7d00bb2093405bac46a3bea83b2050c6a5f8d633406358e9fd94f"},"schema_version":"1.0","source":{"id":"1607.03842","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1607.03842","created_at":"2026-05-18T01:11:05Z"},{"alias_kind":"arxiv_version","alias_value":"1607.03842v1","created_at":"2026-05-18T01:11:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1607.03842","created_at":"2026-05-18T01:11:05Z"},{"alias_kind":"pith_short_12","alias_value":"Z36DKIW5RCV4","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_16","alias_value":"Z36DKIW5RCV4IDJB","created_at":"2026-05-18T12:30:53Z"},{"alias_kind":"pith_short_8","alias_value":"Z36DKIW5","created_at":"2026-05-18T12:30:53Z"}],"graph_snapshots":[{"event_id":"sha256:125d1d54e12e2d29d8617b813a25b3405559ea8305aae0b4734f34b3a762bcae","target":"graph","created_at":"2026-05-18T01:11:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"An important problem in sequential decision-making under uncertainty is to use limited data to compute a safe policy, i.e., a policy that is guaranteed to perform at least as well as a given baseline strategy. In this paper, we develop and analyze a new model-based approach to compute a safe policy when we have access to an inaccurate dynamics model of the system with known accuracy guarantees. Our proposed robust method uses this (inaccurate) model to directly minimize the (negative) regret w.r.t. the baseline policy. Contrary to the existing approaches, minimizing the regret allows one to im","authors_text":"Marek Petrik, Mohammad Ghavamzadeh, Yinlam Chow","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-07-13T17:55:45Z","title":"Safe Policy Improvement by Minimizing Robust Baseline Regret"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1607.03842","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8b42e2b8214594897ab4fc8dac0c46f2a3fbb76ca21a28229ca1c25eb447008f","target":"record","created_at":"2026-05-18T01:11:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a2754d1787251a8ec18bccdea147f5c6c13851763a06a20ce62193c8dd2b0990","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-07-13T17:55:45Z","title_canon_sha256":"79cf28bc4ee7d00bb2093405bac46a3bea83b2050c6a5f8d633406358e9fd94f"},"schema_version":"1.0","source":{"id":"1607.03842","kind":"arxiv","version":1}},"canonical_sha256":"cefc3522dd88abc40d21f7cc7617aaf0bfc79d27f1cea3c2f1716874d0f550b8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cefc3522dd88abc40d21f7cc7617aaf0bfc79d27f1cea3c2f1716874d0f550b8","first_computed_at":"2026-05-18T01:11:05.560022Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:11:05.560022Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RG6h3X95+hXuXeZ7DghAufGao7a+t0I0H/4GZZP7rwwGe++L6uL19+aDZozzkOd0yH1NMPt7GwFIfPlPH4pFAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:11:05.560615Z","signed_message":"canonical_sha256_bytes"},"source_id":"1607.03842","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8b42e2b8214594897ab4fc8dac0c46f2a3fbb76ca21a28229ca1c25eb447008f","sha256:125d1d54e12e2d29d8617b813a25b3405559ea8305aae0b4734f34b3a762bcae"],"state_sha256":"087b90778929899ea9405c1f4dec9756c887749314d856fd8201152d6dd03efb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lu6PvzL2lpUw0SSii0hnmIIkapdXrrDsvJpdY37iNovn2Tr7XWecVVNF8o7/SuWtsjJEH38WeDuYVMXXjiCvDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T02:44:37.704166Z","bundle_sha256":"e54b451657637e9e0153857c0dd64a34bbd17a9fb2d11984dd58c77572fe4ae4"}}