{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:ULDQWYWXZ5NZSGCFFZPXX6D4V4","short_pith_number":"pith:ULDQWYWX","schema_version":"1.0","canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","source":{"kind":"arxiv","id":"1903.02526","version":2},"attestation_state":"computed","paper":{"title":"Safety-Guided Deep Reinforcement Learning via Online Gaussian Process Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Jiameng Fan, Wenchao Li","submitted_at":"2019-03-06T18:02:08Z","abstract_excerpt":"An important facet of reinforcement learning (RL) has to do with how the agent goes about exploring the environment. Traditional exploration strategies typically focus on efficiency and ignore safety. However, for practical applications, ensuring safety of the agent during exploration is crucial since performing an unsafe action or reaching an unsafe state could result in irreversible damage to the agent. The main challenge of safe exploration is that characterizing the unsafe states and actions is difficult for large continuous state or action spaces and unknown environments. In this paper, w"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.02526","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-06T18:02:08Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"14e39b90df7bbab88d76a2f3891ae2016586746c63b13711b30b96c833379147","abstract_canon_sha256":"146ceb03a049577c84e4ca638d114a46e8cba646a0d161fc3df30b82d2a843ba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:04.517100Z","signature_b64":"ctuPvUkAvMExAaO//rJxK4iiqatYJNCW+9/MIall7JOYLmDiEVL6Qc4YTlqKWr4BhFGkMIPS6vnzVNO6qAmpDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a2c70b62d7cf5b9918452e5f7bf87caf274b12de5ab74b2e6bd7318f2555ef6f","last_reissued_at":"2026-05-17T23:48:04.516698Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:04.516698Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Safety-Guided Deep Reinforcement Learning via Online Gaussian Process Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Jiameng Fan, Wenchao Li","submitted_at":"2019-03-06T18:02:08Z","abstract_excerpt":"An important facet of reinforcement learning (RL) has to do with how the agent goes about exploring the environment. Traditional exploration strategies typically focus on efficiency and ignore safety. However, for practical applications, ensuring safety of the agent during exploration is crucial since performing an unsafe action or reaching an unsafe state could result in irreversible damage to the agent. The main challenge of safe exploration is that characterizing the unsafe states and actions is difficult for large continuous state or action spaces and unknown environments. In this paper, w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.02526","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.02526","created_at":"2026-05-17T23:48:04.516770+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.02526v2","created_at":"2026-05-17T23:48:04.516770+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.02526","created_at":"2026-05-17T23:48:04.516770+00:00"},{"alias_kind":"pith_short_12","alias_value":"ULDQWYWXZ5NZ","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"ULDQWYWXZ5NZSGCF","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"ULDQWYWX","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4","json":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4.json","graph_json":"https://pith.science/api/pith-number/ULDQWYWXZ5NZSGCFFZPXX6D4V4/graph.json","events_json":"https://pith.science/api/pith-number/ULDQWYWXZ5NZSGCFFZPXX6D4V4/events.json","paper":"https://pith.science/paper/ULDQWYWX"},"agent_actions":{"view_html":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4","download_json":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4.json","view_paper":"https://pith.science/paper/ULDQWYWX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.02526&json=true","fetch_graph":"https://pith.science/api/pith-number/ULDQWYWXZ5NZSGCFFZPXX6D4V4/graph.json","fetch_events":"https://pith.science/api/pith-number/ULDQWYWXZ5NZSGCFFZPXX6D4V4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/action/storage_attestation","attest_author":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/action/author_attestation","sign_citation":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/action/citation_signature","submit_replication":"https://pith.science/pith/ULDQWYWXZ5NZSGCFFZPXX6D4V4/action/replication_record"}},"created_at":"2026-05-17T23:48:04.516770+00:00","updated_at":"2026-05-17T23:48:04.516770+00:00"}