{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:2J4EXT6CDOENBOTMAZD6WN7O7F","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"715d80270f5d0de988c99dea143bf9116bffa30e8e05df3c4523e45c67899ed1","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-07T18:16:28Z","title_canon_sha256":"4a98782f7016140a59ed91419efaa77493a41678d1175075f10604550797674c"},"schema_version":"1.0","source":{"id":"1811.03056","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.03056","created_at":"2026-05-17T23:44:58Z"},{"alias_kind":"arxiv_version","alias_value":"1811.03056v3","created_at":"2026-05-17T23:44:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.03056","created_at":"2026-05-17T23:44:58Z"},{"alias_kind":"pith_short_12","alias_value":"2J4EXT6CDOEN","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"2J4EXT6CDOENBOTM","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"2J4EXT6C","created_at":"2026-05-18T12:32:02Z"}],"graph_snapshots":[{"event_id":"sha256:cd973daf564e485b15710e37b5e30853e036efea6d90798dbb19ff1c34126b41","target":"graph","created_at":"2026-05-17T23:44:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The performance of a reinforcement learning algorithm can vary drastically during learning because of exploration. Existing algorithms provide little information about the quality of their current policy before executing it, and thus have limited use in high-stakes applications like healthcare. We address this lack of accountability by proposing that algorithms output policy certificates. These certificates bound the sub-optimality and return of the policy in the next episode, allowing humans to intervene when the certified quality is not satisfactory. We further introduce two new algorithms w","authors_text":"Christoph Dann, Emma Brunskill, Lihong Li, Wei Wei","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-07T18:16:28Z","title":"Policy Certificates: Towards Accountable Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.03056","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:87d693d67a6e133496a2c124364634d964f2f19f721e0851a8a47e95ef891b06","target":"record","created_at":"2026-05-17T23:44:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"715d80270f5d0de988c99dea143bf9116bffa30e8e05df3c4523e45c67899ed1","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-07T18:16:28Z","title_canon_sha256":"4a98782f7016140a59ed91419efaa77493a41678d1175075f10604550797674c"},"schema_version":"1.0","source":{"id":"1811.03056","kind":"arxiv","version":3}},"canonical_sha256":"d2784bcfc21b88d0ba6c0647eb37eef944fd96c4f517c9f95fc2c51bb5507ef7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d2784bcfc21b88d0ba6c0647eb37eef944fd96c4f517c9f95fc2c51bb5507ef7","first_computed_at":"2026-05-17T23:44:58.620170Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:58.620170Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FS12225gxSleJcj10FGxxQyRw24Xl5uEfUph80lbWEse/EkTyVJXUTU7O9X9qBzH/FU+q1r7ArIsHWNYhyVxAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:58.620812Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.03056","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:87d693d67a6e133496a2c124364634d964f2f19f721e0851a8a47e95ef891b06","sha256:cd973daf564e485b15710e37b5e30853e036efea6d90798dbb19ff1c34126b41"],"state_sha256":"aee8559d983c5c3419c81fb25ba74319227773530c5c8b35206cdc82ce0837d5"}