{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:ULDQWYWXZ5NZSGCFFZPXX6D4V4","short_pith_number":"pith:ULDQWYWX","canonical_record":{"source":{"id":"1903.02526","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-06T18:02:08Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"14e39b90df7bbab88d76a2f3891ae2016586746c63b13711b30b96c833379147","abstract_canon_sha256":"146ceb03a049577c84e4ca638d114a46e8cba646a0d161fc3df30b82d2a843ba"},"schema_version":"1.0"},"canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","source":{"kind":"arxiv","id":"1903.02526","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.02526","created_at":"2026-05-17T23:48:04Z"},{"alias_kind":"arxiv_version","alias_value":"1903.02526v2","created_at":"2026-05-17T23:48:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.02526","created_at":"2026-05-17T23:48:04Z"},{"alias_kind":"pith_short_12","alias_value":"ULDQWYWXZ5NZ","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"ULDQWYWXZ5NZSGCF","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"ULDQWYWX","created_at":"2026-05-18T12:33:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:ULDQWYWXZ5NZSGCFFZPXX6D4V4","target":"record","payload":{"canonical_record":{"source":{"id":"1903.02526","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-06T18:02:08Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"14e39b90df7bbab88d76a2f3891ae2016586746c63b13711b30b96c833379147","abstract_canon_sha256":"146ceb03a049577c84e4ca638d114a46e8cba646a0d161fc3df30b82d2a843ba"},"schema_version":"1.0"},"canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:04.517100Z","signature_b64":"ctuPvUkAvMExAaO//rJxK4iiqatYJNCW+9/MIall7JOYLmDiEVL6Qc4YTlqKWr4BhFGkMIPS6vnzVNO6qAmpDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","last_reissued_at":"2026-05-17T23:48:04.516698Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:04.516698Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.02526","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dADV0kak2qYt7nGFgBwLkA8HVPfjSP0AJRxLAVWsOvKa2nZajIPvOmCsqb+Hj8YnHs5KtP1GWAetkDLw01y1AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T01:55:56.096531Z"},"content_sha256":"dac4a690f386af76c8d2280cae95eda382f1f3ddad1b6a6324981b056e296706","schema_version":"1.0","event_id":"sha256:dac4a690f386af76c8d2280cae95eda382f1f3ddad1b6a6324981b056e296706"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:ULDQWYWXZ5NZSGCFFZPXX6D4V4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safety-Guided Deep Reinforcement Learning via Online Gaussian Process Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Jiameng Fan, Wenchao Li","submitted_at":"2019-03-06T18:02:08Z","abstract_excerpt":"An important facet of reinforcement learning (RL) has to do with how the agent goes about exploring the environment. Traditional exploration strategies typically focus on efficiency and ignore safety. However, for practical applications, ensuring safety of the agent during exploration is crucial since performing an unsafe action or reaching an unsafe state could result in irreversible damage to the agent. The main challenge of safe exploration is that characterizing the unsafe states and actions is difficult for large continuous state or action spaces and unknown environments. In this paper, w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.02526","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aXE1vmn2dTv4wtLtCyi4GH241CTKokXpwLoHhukoWKK6nBnL7iCg4zEygXvdYjM+/TUE0Aa3SnOukeG9MI1GDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T01:55:56.096936Z"},"content_sha256":"46462fe09b546799567f3ac26682a907429ca8b1c27a0f63b45844cbe050f96d","schema_version":"1.0","event_id":"sha256:46462fe09b546799567f3ac26682a907429ca8b1c27a0f63b45844cbe050f96d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/bundle.json","state_url":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T01:55:56Z","links":{"resolver":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4","bundle":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/bundle.json","state":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:ULDQWYWXZ5NZSGCFFZPXX6D4V4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"146ceb03a049577c84e4ca638d114a46e8cba646a0d161fc3df30b82d2a843ba","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-06T18:02:08Z","title_canon_sha256":"14e39b90df7bbab88d76a2f3891ae2016586746c63b13711b30b96c833379147"},"schema_version":"1.0","source":{"id":"1903.02526","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.02526","created_at":"2026-05-17T23:48:04Z"},{"alias_kind":"arxiv_version","alias_value":"1903.02526v2","created_at":"2026-05-17T23:48:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.02526","created_at":"2026-05-17T23:48:04Z"},{"alias_kind":"pith_short_12","alias_value":"ULDQWYWXZ5NZ","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"ULDQWYWXZ5NZSGCF","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"ULDQWYWX","created_at":"2026-05-18T12:33:30Z"}],"graph_snapshots":[{"event_id":"sha256:46462fe09b546799567f3ac26682a907429ca8b1c27a0f63b45844cbe050f96d","target":"graph","created_at":"2026-05-17T23:48:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"An important facet of reinforcement learning (RL) has to do with how the agent goes about exploring the environment. Traditional exploration strategies typically focus on efficiency and ignore safety. However, for practical applications, ensuring safety of the agent during exploration is crucial since performing an unsafe action or reaching an unsafe state could result in irreversible damage to the agent. The main challenge of safe exploration is that characterizing the unsafe states and actions is difficult for large continuous state or action spaces and unknown environments. In this paper, w","authors_text":"Jiameng Fan, Wenchao Li","cross_cats":["cs.AI","cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-06T18:02:08Z","title":"Safety-Guided Deep Reinforcement Learning via Online Gaussian Process Estimation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.02526","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dac4a690f386af76c8d2280cae95eda382f1f3ddad1b6a6324981b056e296706","target":"record","created_at":"2026-05-17T23:48:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"146ceb03a049577c84e4ca638d114a46e8cba646a0d161fc3df30b82d2a843ba","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-06T18:02:08Z","title_canon_sha256":"14e39b90df7bbab88d76a2f3891ae2016586746c63b13711b30b96c833379147"},"schema_version":"1.0","source":{"id":"1903.02526","kind":"arxiv","version":2}},"canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","first_computed_at":"2026-05-17T23:48:04.516698Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:48:04.516698Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ctuPvUkAvMExAaO//rJxK4iiqatYJNCW+9/MIall7JOYLmDiEVL6Qc4YTlqKWr4BhFGkMIPS6vnzVNO6qAmpDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:48:04.517100Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.02526","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dac4a690f386af76c8d2280cae95eda382f1f3ddad1b6a6324981b056e296706","sha256:46462fe09b546799567f3ac26682a907429ca8b1c27a0f63b45844cbe050f96d"],"state_sha256":"5d34ba28ce3d4e36067a4245c3d5cb14070bccbb0a9f0c57aefa688dc0d02225"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iiqYHBwILVZEgRUFtnmAHUNJ6R1vLXQs99BQVFY+M9y0bWif0b1n3/yglCrpn5mQW03H+n/JTg8kLr0BSI0/DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T01:55:56.099627Z","bundle_sha256":"396e59d43ebd3d02ee003749e4e8035c06e262cb89a70007207e6d1a649969d6"}}