{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:5JBM562L6DL67OM3Y4PSXWQSK4","short_pith_number":"pith:5JBM562L","canonical_record":{"source":{"id":"1708.08611","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LO","submitted_at":"2017-08-29T07:16:54Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"ca7cabcea99ff5e209ea6c3a37e7880bc371aef794e7ae09ca121a2f49d402a7","abstract_canon_sha256":"03b412be11f6ce40af40159d22b0d22f2e807a1391f2fe421d3e1453d43542cf"},"schema_version":"1.0"},"canonical_sha256":"ea42cefb4bf0d7efb99bc71f2bda1257225bc38444a9bf7d4572bc4416a5e290","source":{"kind":"arxiv","id":"1708.08611","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1708.08611","created_at":"2026-05-18T00:36:08Z"},{"alias_kind":"arxiv_version","alias_value":"1708.08611v2","created_at":"2026-05-18T00:36:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.08611","created_at":"2026-05-18T00:36:08Z"},{"alias_kind":"pith_short_12","alias_value":"5JBM562L6DL6","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"5JBM562L6DL67OM3","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"5JBM562L","created_at":"2026-05-18T12:31:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:5JBM562L6DL67OM3Y4PSXWQSK4","target":"record","payload":{"canonical_record":{"source":{"id":"1708.08611","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LO","submitted_at":"2017-08-29T07:16:54Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"ca7cabcea99ff5e209ea6c3a37e7880bc371aef794e7ae09ca121a2f49d402a7","abstract_canon_sha256":"03b412be11f6ce40af40159d22b0d22f2e807a1391f2fe421d3e1453d43542cf"},"schema_version":"1.0"},"canonical_sha256":"ea42cefb4bf0d7efb99bc71f2bda1257225bc38444a9bf7d4572bc4416a5e290","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:36:08.187868Z","signature_b64":"JNqSpesjHDTl5g6ssE8+7U6diZFK2GIli7YdyYHTMkNKTAa4AQKaTo2vx3b24SzfrNpVwF2Rpw4YeyYUj8CBCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ea42cefb4bf0d7efb99bc71f2bda1257225bc38444a9bf7d4572bc4416a5e290","last_reissued_at":"2026-05-18T00:36:08.187138Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:36:08.187138Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1708.08611","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:36:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"btQ623TEEvxsyfI4WqAup9IL5BjvAIT3tWa3IdYEM0sA8Lxh2tRI3QNPKSR5pspPE6AB59VzUo/yprIHXlp5Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T10:03:41.948479Z"},"content_sha256":"7b145f5f85f90431936baf0bf6b9aeac3e5d6ac85c99d666dac6d2f1e697c726","schema_version":"1.0","event_id":"sha256:7b145f5f85f90431936baf0bf6b9aeac3e5d6ac85c99d666dac6d2f1e697c726"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:5JBM562L6DL67OM3Y4PSXWQSK4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safe Reinforcement Learning via Shielding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.LO","authors_text":"Bettina K\\\"onighofer, Mohammed Alshiekh, Roderick Bloem, Ruediger Ehlers, Scott Niekum, Ufuk Topcu","submitted_at":"2017-08-29T07:16:54Z","abstract_excerpt":"Reinforcement learning algorithms discover policies that maximize reward, but do not necessarily guarantee safety during learning or execution phases. We introduce a new approach to learn optimal policies while enforcing properties expressed in temporal logic. To this end, given the temporal logic specification that is to be obeyed by the learning system, we propose to synthesize a reactive system called a shield. The shield is introduced in the traditional learning process in two alternative ways, depending on the location at which the shield is implemented. In the first one, the shield acts "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.08611","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:36:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GBKa+SHGauHLiMmZOsdnh/pY4MW+49Bph/e10qgDKP3uZFAXkd0+Xa75f82hpT+JgSHm+ax+ULWsEv500V8EBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T10:03:41.948829Z"},"content_sha256":"0df5e091c495e77f72aa79fecbedef43dcbadaead248ce156af778080f1bf742","schema_version":"1.0","event_id":"sha256:0df5e091c495e77f72aa79fecbedef43dcbadaead248ce156af778080f1bf742"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5JBM562L6DL67OM3Y4PSXWQSK4/bundle.json","state_url":"https://pith.science/pith/5JBM562L6DL67OM3Y4PSXWQSK4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5JBM562L6DL67OM3Y4PSXWQSK4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T10:03:41Z","links":{"resolver":"https://pith.science/pith/5JBM562L6DL67OM3Y4PSXWQSK4","bundle":"https://pith.science/pith/5JBM562L6DL67OM3Y4PSXWQSK4/bundle.json","state":"https://pith.science/pith/5JBM562L6DL67OM3Y4PSXWQSK4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5JBM562L6DL67OM3Y4PSXWQSK4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:5JBM562L6DL67OM3Y4PSXWQSK4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"03b412be11f6ce40af40159d22b0d22f2e807a1391f2fe421d3e1453d43542cf","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LO","submitted_at":"2017-08-29T07:16:54Z","title_canon_sha256":"ca7cabcea99ff5e209ea6c3a37e7880bc371aef794e7ae09ca121a2f49d402a7"},"schema_version":"1.0","source":{"id":"1708.08611","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1708.08611","created_at":"2026-05-18T00:36:08Z"},{"alias_kind":"arxiv_version","alias_value":"1708.08611v2","created_at":"2026-05-18T00:36:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.08611","created_at":"2026-05-18T00:36:08Z"},{"alias_kind":"pith_short_12","alias_value":"5JBM562L6DL6","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"5JBM562L6DL67OM3","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"5JBM562L","created_at":"2026-05-18T12:31:00Z"}],"graph_snapshots":[{"event_id":"sha256:0df5e091c495e77f72aa79fecbedef43dcbadaead248ce156af778080f1bf742","target":"graph","created_at":"2026-05-18T00:36:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning algorithms discover policies that maximize reward, but do not necessarily guarantee safety during learning or execution phases. We introduce a new approach to learn optimal policies while enforcing properties expressed in temporal logic. To this end, given the temporal logic specification that is to be obeyed by the learning system, we propose to synthesize a reactive system called a shield. The shield is introduced in the traditional learning process in two alternative ways, depending on the location at which the shield is implemented. In the first one, the shield acts ","authors_text":"Bettina K\\\"onighofer, Mohammed Alshiekh, Roderick Bloem, Ruediger Ehlers, Scott Niekum, Ufuk Topcu","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LO","submitted_at":"2017-08-29T07:16:54Z","title":"Safe Reinforcement Learning via Shielding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.08611","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7b145f5f85f90431936baf0bf6b9aeac3e5d6ac85c99d666dac6d2f1e697c726","target":"record","created_at":"2026-05-18T00:36:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"03b412be11f6ce40af40159d22b0d22f2e807a1391f2fe421d3e1453d43542cf","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LO","submitted_at":"2017-08-29T07:16:54Z","title_canon_sha256":"ca7cabcea99ff5e209ea6c3a37e7880bc371aef794e7ae09ca121a2f49d402a7"},"schema_version":"1.0","source":{"id":"1708.08611","kind":"arxiv","version":2}},"canonical_sha256":"ea42cefb4bf0d7efb99bc71f2bda1257225bc38444a9bf7d4572bc4416a5e290","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ea42cefb4bf0d7efb99bc71f2bda1257225bc38444a9bf7d4572bc4416a5e290","first_computed_at":"2026-05-18T00:36:08.187138Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:36:08.187138Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JNqSpesjHDTl5g6ssE8+7U6diZFK2GIli7YdyYHTMkNKTAa4AQKaTo2vx3b24SzfrNpVwF2Rpw4YeyYUj8CBCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:36:08.187868Z","signed_message":"canonical_sha256_bytes"},"source_id":"1708.08611","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7b145f5f85f90431936baf0bf6b9aeac3e5d6ac85c99d666dac6d2f1e697c726","sha256:0df5e091c495e77f72aa79fecbedef43dcbadaead248ce156af778080f1bf742"],"state_sha256":"9131767cb5c9c10729ef905fc940cc51e118e2b84557b0f808151cc011947c8e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BqG7hNFYnCSv2CqxPZRGkIN8IT7weCIDb31oWvik9LTfS+tzd3e2WE721AAdXX5kwV/Huqgh07KjZ5XiHfEHBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T10:03:41.950756Z","bundle_sha256":"318ac02c41bc6a26c8a96b5a3087573449de8b612e6cc8aafba9313ed198a999"}}