{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:SPP23B4NXX43PTX7LDPYNW6IGG","short_pith_number":"pith:SPP23B4N","canonical_record":{"source":{"id":"1402.0560","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-02-04T01:34:25Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f41f6c1b9e7a07837b9363578bd4fdd2792dd6b22e02accf044640b9cf6bd7b9","abstract_canon_sha256":"b20c8525425e18c532b7758aac3fe2bc8cead0f1bf5f1450139c54b0abd8c463"},"schema_version":"1.0"},"canonical_sha256":"93dfad878dbdf9b7ceff58df86dbc831915034b954706e6ab58b415323cd573c","source":{"kind":"arxiv","id":"1402.0560","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1402.0560","created_at":"2026-05-18T03:00:13Z"},{"alias_kind":"arxiv_version","alias_value":"1402.0560v1","created_at":"2026-05-18T03:00:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1402.0560","created_at":"2026-05-18T03:00:13Z"},{"alias_kind":"pith_short_12","alias_value":"SPP23B4NXX43","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_16","alias_value":"SPP23B4NXX43PTX7","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_8","alias_value":"SPP23B4N","created_at":"2026-05-18T12:28:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:SPP23B4NXX43PTX7LDPYNW6IGG","target":"record","payload":{"canonical_record":{"source":{"id":"1402.0560","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-02-04T01:34:25Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f41f6c1b9e7a07837b9363578bd4fdd2792dd6b22e02accf044640b9cf6bd7b9","abstract_canon_sha256":"b20c8525425e18c532b7758aac3fe2bc8cead0f1bf5f1450139c54b0abd8c463"},"schema_version":"1.0"},"canonical_sha256":"93dfad878dbdf9b7ceff58df86dbc831915034b954706e6ab58b415323cd573c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:00:13.941095Z","signature_b64":"y7X9/PvOPsJLgkhzUT/uv4mnugX6beHNxcZXs8Kkw/R6c6x5ViWlNidqYW1SZCE8SGI39T8Zxnp/xVXH1/AGBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"93dfad878dbdf9b7ceff58df86dbc831915034b954706e6ab58b415323cd573c","last_reissued_at":"2026-05-18T03:00:13.940296Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:00:13.940296Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1402.0560","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:00:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+YPl+9JXj3icfMMkzBMUEHy8F3KxkJuhyYEWOWIGcNMkeJQOGVKfL/KkeiFgybaLPsmkBvV/IA6MqT0u6bwBCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T07:09:35.120659Z"},"content_sha256":"2c51443ae8d3dcba2eb76b5ad99de73571c62c385ab121d686d49bb0e3d5ef10","schema_version":"1.0","event_id":"sha256:2c51443ae8d3dcba2eb76b5ad99de73571c62c385ab121d686d49bb0e3d5ef10"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:SPP23B4NXX43PTX7LDPYNW6IGG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safe Exploration of State and Action Spaces in Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Fernando Fernandez, Javier Garcia","submitted_at":"2014-02-04T01:34:25Z","abstract_excerpt":"In this paper, we consider the important problem of safe exploration in reinforcement learning. While reinforcement learning is well-suited to domains with complex transition dynamics and high-dimensional state-action spaces, an additional challenge is posed by the need for safe and efficient exploration. Traditional exploration techniques are not particularly useful for solving dangerous tasks, where the trial and error process may lead to the selection of actions whose execution in some states may result in damage to the learning system (or any other system). Consequently, when an agent begi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1402.0560","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:00:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gXTpy9VhkVx+oh00whJlHHcvXWxuM+ZY8OkQWOEc4OC8B5EFaDhoQAiYdOWZ2WCLiyvzBaiov2yvvHWXNIuQDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T07:09:35.121119Z"},"content_sha256":"9a13c449d3a56f3c21f8af935b369344883e3dab095c7b7dba5fe4c92aa96025","schema_version":"1.0","event_id":"sha256:9a13c449d3a56f3c21f8af935b369344883e3dab095c7b7dba5fe4c92aa96025"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SPP23B4NXX43PTX7LDPYNW6IGG/bundle.json","state_url":"https://pith.science/pith/SPP23B4NXX43PTX7LDPYNW6IGG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SPP23B4NXX43PTX7LDPYNW6IGG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T07:09:35Z","links":{"resolver":"https://pith.science/pith/SPP23B4NXX43PTX7LDPYNW6IGG","bundle":"https://pith.science/pith/SPP23B4NXX43PTX7LDPYNW6IGG/bundle.json","state":"https://pith.science/pith/SPP23B4NXX43PTX7LDPYNW6IGG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SPP23B4NXX43PTX7LDPYNW6IGG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:SPP23B4NXX43PTX7LDPYNW6IGG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b20c8525425e18c532b7758aac3fe2bc8cead0f1bf5f1450139c54b0abd8c463","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-02-04T01:34:25Z","title_canon_sha256":"f41f6c1b9e7a07837b9363578bd4fdd2792dd6b22e02accf044640b9cf6bd7b9"},"schema_version":"1.0","source":{"id":"1402.0560","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1402.0560","created_at":"2026-05-18T03:00:13Z"},{"alias_kind":"arxiv_version","alias_value":"1402.0560v1","created_at":"2026-05-18T03:00:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1402.0560","created_at":"2026-05-18T03:00:13Z"},{"alias_kind":"pith_short_12","alias_value":"SPP23B4NXX43","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_16","alias_value":"SPP23B4NXX43PTX7","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_8","alias_value":"SPP23B4N","created_at":"2026-05-18T12:28:49Z"}],"graph_snapshots":[{"event_id":"sha256:9a13c449d3a56f3c21f8af935b369344883e3dab095c7b7dba5fe4c92aa96025","target":"graph","created_at":"2026-05-18T03:00:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this paper, we consider the important problem of safe exploration in reinforcement learning. While reinforcement learning is well-suited to domains with complex transition dynamics and high-dimensional state-action spaces, an additional challenge is posed by the need for safe and efficient exploration. Traditional exploration techniques are not particularly useful for solving dangerous tasks, where the trial and error process may lead to the selection of actions whose execution in some states may result in damage to the learning system (or any other system). Consequently, when an agent begi","authors_text":"Fernando Fernandez, Javier Garcia","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-02-04T01:34:25Z","title":"Safe Exploration of State and Action Spaces in Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1402.0560","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2c51443ae8d3dcba2eb76b5ad99de73571c62c385ab121d686d49bb0e3d5ef10","target":"record","created_at":"2026-05-18T03:00:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b20c8525425e18c532b7758aac3fe2bc8cead0f1bf5f1450139c54b0abd8c463","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-02-04T01:34:25Z","title_canon_sha256":"f41f6c1b9e7a07837b9363578bd4fdd2792dd6b22e02accf044640b9cf6bd7b9"},"schema_version":"1.0","source":{"id":"1402.0560","kind":"arxiv","version":1}},"canonical_sha256":"93dfad878dbdf9b7ceff58df86dbc831915034b954706e6ab58b415323cd573c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"93dfad878dbdf9b7ceff58df86dbc831915034b954706e6ab58b415323cd573c","first_computed_at":"2026-05-18T03:00:13.940296Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:00:13.940296Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"y7X9/PvOPsJLgkhzUT/uv4mnugX6beHNxcZXs8Kkw/R6c6x5ViWlNidqYW1SZCE8SGI39T8Zxnp/xVXH1/AGBw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:00:13.941095Z","signed_message":"canonical_sha256_bytes"},"source_id":"1402.0560","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2c51443ae8d3dcba2eb76b5ad99de73571c62c385ab121d686d49bb0e3d5ef10","sha256:9a13c449d3a56f3c21f8af935b369344883e3dab095c7b7dba5fe4c92aa96025"],"state_sha256":"dfbff017b33d2ebd1ae28750113332188d6be3425d0e25f0d889af68b0f33a92"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZWipqHcQ7kFsSeD6bJiz7xTYPTQscQAA0yJNn2q0bleQrszDgG9ozI7oLClR5w4gz7cr8ViF72HTYQtKttlcCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T07:09:35.123742Z","bundle_sha256":"43d2dd0ede17138c073c39426846b72bdaa8f1d5f8b29d2dfafded28229433dc"}}