{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:HQSCQCUGQSVGCKUVWTJKXKHVZB","short_pith_number":"pith:HQSCQCUG","canonical_record":{"source":{"id":"1510.05880","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2015-10-20T13:20:01Z","cross_cats_sorted":["cs.SY"],"title_canon_sha256":"65d93990a6f8fb26c290f4c0b7634116faad801320e2e93da0af0feb212076b7","abstract_canon_sha256":"69c321364acee1d7a1f071ba8c8e7a2f5ab122ed0209a01a946c6fc056895b63"},"schema_version":"1.0"},"canonical_sha256":"3c24280a8684aa612a95b4d2aba8f5c863d92208be0eba598fd34e0cfb693364","source":{"kind":"arxiv","id":"1510.05880","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1510.05880","created_at":"2026-05-18T01:29:39Z"},{"alias_kind":"arxiv_version","alias_value":"1510.05880v1","created_at":"2026-05-18T01:29:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1510.05880","created_at":"2026-05-18T01:29:39Z"},{"alias_kind":"pith_short_12","alias_value":"HQSCQCUGQSVG","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_16","alias_value":"HQSCQCUGQSVGCKUV","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_8","alias_value":"HQSCQCUG","created_at":"2026-05-18T12:29:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:HQSCQCUGQSVGCKUVWTJKXKHVZB","target":"record","payload":{"canonical_record":{"source":{"id":"1510.05880","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2015-10-20T13:20:01Z","cross_cats_sorted":["cs.SY"],"title_canon_sha256":"65d93990a6f8fb26c290f4c0b7634116faad801320e2e93da0af0feb212076b7","abstract_canon_sha256":"69c321364acee1d7a1f071ba8c8e7a2f5ab122ed0209a01a946c6fc056895b63"},"schema_version":"1.0"},"canonical_sha256":"3c24280a8684aa612a95b4d2aba8f5c863d92208be0eba598fd34e0cfb693364","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:29:39.457744Z","signature_b64":"5jQFEiE4UmWIXnZN0rMF7gPVIbjzaCI/ZnUyNFP2cgasW4oUl5W9hoaIjbsdj0pQhn62bnI560RYiCZTi8yYDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3c24280a8684aa612a95b4d2aba8f5c863d92208be0eba598fd34e0cfb693364","last_reissued_at":"2026-05-18T01:29:39.457115Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:29:39.457115Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1510.05880","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:29:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"S/Y+ky01+NgC8585vlpbxhoyngBBR6BOMbEK9bOcP86Qtque8g1Cyz662R26TtrlAmiYG2PYYUOgQ1CD9Y4BBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T11:34:59.533717Z"},"content_sha256":"3b82fe56294138b2ebaa5709fe3012fc1c80aaea8885f189d9b07e4756458bd9","schema_version":"1.0","event_id":"sha256:3b82fe56294138b2ebaa5709fe3012fc1c80aaea8885f189d9b07e4756458bd9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:HQSCQCUGQSVGCKUVWTJKXKHVZB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safety-Constrained Reinforcement Learning for MDPs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SY"],"primary_cat":"cs.SE","authors_text":"Christian Dehnert, Joost-Pieter Katoen, Nils Jansen, Sebastian Junges, Ufuk Topcu","submitted_at":"2015-10-20T13:20:01Z","abstract_excerpt":"We consider controller synthesis for stochastic and partially unknown environments in which safety is essential. Specifically, we abstract the problem as a Markov decision process in which the expected performance is measured using a cost function that is unknown prior to run-time exploration of the state space. Standard learning approaches synthesize cost-optimal strategies without guaranteeing safety properties. To remedy this, we first compute safe, permissive strategies. Then, exploration is constrained to these strategies and thereby meets the imposed safety requirements. Exploiting an it"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1510.05880","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:29:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7sCO7uAJ54APKq8RvVv2/sgT7hmjYmwdgWy+yDISrp7gGUK7ukORbUgixDCTpquUTkLoj0CNTEZ7RThM/NKqBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T11:34:59.534475Z"},"content_sha256":"cd99d8695aaa3c8d6a0b3c79c585cb2245188a26edcd086d5354db09b4015de6","schema_version":"1.0","event_id":"sha256:cd99d8695aaa3c8d6a0b3c79c585cb2245188a26edcd086d5354db09b4015de6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB/bundle.json","state_url":"https://pith.science/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T11:34:59Z","links":{"resolver":"https://pith.science/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB","bundle":"https://pith.science/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB/bundle.json","state":"https://pith.science/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HQSCQCUGQSVGCKUVWTJKXKHVZB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:HQSCQCUGQSVGCKUVWTJKXKHVZB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"69c321364acee1d7a1f071ba8c8e7a2f5ab122ed0209a01a946c6fc056895b63","cross_cats_sorted":["cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2015-10-20T13:20:01Z","title_canon_sha256":"65d93990a6f8fb26c290f4c0b7634116faad801320e2e93da0af0feb212076b7"},"schema_version":"1.0","source":{"id":"1510.05880","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1510.05880","created_at":"2026-05-18T01:29:39Z"},{"alias_kind":"arxiv_version","alias_value":"1510.05880v1","created_at":"2026-05-18T01:29:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1510.05880","created_at":"2026-05-18T01:29:39Z"},{"alias_kind":"pith_short_12","alias_value":"HQSCQCUGQSVG","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_16","alias_value":"HQSCQCUGQSVGCKUV","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_8","alias_value":"HQSCQCUG","created_at":"2026-05-18T12:29:25Z"}],"graph_snapshots":[{"event_id":"sha256:cd99d8695aaa3c8d6a0b3c79c585cb2245188a26edcd086d5354db09b4015de6","target":"graph","created_at":"2026-05-18T01:29:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider controller synthesis for stochastic and partially unknown environments in which safety is essential. Specifically, we abstract the problem as a Markov decision process in which the expected performance is measured using a cost function that is unknown prior to run-time exploration of the state space. Standard learning approaches synthesize cost-optimal strategies without guaranteeing safety properties. To remedy this, we first compute safe, permissive strategies. Then, exploration is constrained to these strategies and thereby meets the imposed safety requirements. Exploiting an it","authors_text":"Christian Dehnert, Joost-Pieter Katoen, Nils Jansen, Sebastian Junges, Ufuk Topcu","cross_cats":["cs.SY"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2015-10-20T13:20:01Z","title":"Safety-Constrained Reinforcement Learning for MDPs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1510.05880","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3b82fe56294138b2ebaa5709fe3012fc1c80aaea8885f189d9b07e4756458bd9","target":"record","created_at":"2026-05-18T01:29:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"69c321364acee1d7a1f071ba8c8e7a2f5ab122ed0209a01a946c6fc056895b63","cross_cats_sorted":["cs.SY"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2015-10-20T13:20:01Z","title_canon_sha256":"65d93990a6f8fb26c290f4c0b7634116faad801320e2e93da0af0feb212076b7"},"schema_version":"1.0","source":{"id":"1510.05880","kind":"arxiv","version":1}},"canonical_sha256":"3c24280a8684aa612a95b4d2aba8f5c863d92208be0eba598fd34e0cfb693364","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3c24280a8684aa612a95b4d2aba8f5c863d92208be0eba598fd34e0cfb693364","first_computed_at":"2026-05-18T01:29:39.457115Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:29:39.457115Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5jQFEiE4UmWIXnZN0rMF7gPVIbjzaCI/ZnUyNFP2cgasW4oUl5W9hoaIjbsdj0pQhn62bnI560RYiCZTi8yYDA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:29:39.457744Z","signed_message":"canonical_sha256_bytes"},"source_id":"1510.05880","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3b82fe56294138b2ebaa5709fe3012fc1c80aaea8885f189d9b07e4756458bd9","sha256:cd99d8695aaa3c8d6a0b3c79c585cb2245188a26edcd086d5354db09b4015de6"],"state_sha256":"b7c86acacfb30664541d7149af55296c6c6be4d35d2c90c4dc5089c09d44fbb3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"weE90eGjBaZ981m8MIqui+Hu3J9CgVGy84KMuL3YFnnDC0mYMsKHvZMwSh4ufV35b5JfAUkRSnukU9UGbVePBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T11:34:59.538217Z","bundle_sha256":"8c3581c6ed7918a096242a47cbb1daefd4c34c069af2cbee9342e8718fcfaea5"}}