{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:6ZWYUFCTSJU7V2WIUMAJYZOBDY","short_pith_number":"pith:6ZWYUFCT","canonical_record":{"source":{"id":"1708.02838","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-08-09T13:51:47Z","cross_cats_sorted":[],"title_canon_sha256":"e4bcc70d25f8964b00aed4b5e7d2559a358f06c0173ad8765adb3707f21d8188","abstract_canon_sha256":"5b2ecb802942dd619924b44febcfc5336a6fd926b70a88d5f0ef74bf06974ebb"},"schema_version":"1.0"},"canonical_sha256":"f66d8a14539269faeac8a3009c65c11e2e5741bc211a0d262d305033dfd1e7c8","source":{"kind":"arxiv","id":"1708.02838","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1708.02838","created_at":"2026-05-18T00:38:19Z"},{"alias_kind":"arxiv_version","alias_value":"1708.02838v1","created_at":"2026-05-18T00:38:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.02838","created_at":"2026-05-18T00:38:19Z"},{"alias_kind":"pith_short_12","alias_value":"6ZWYUFCTSJU7","created_at":"2026-05-18T12:31:03Z"},{"alias_kind":"pith_short_16","alias_value":"6ZWYUFCTSJU7V2WI","created_at":"2026-05-18T12:31:03Z"},{"alias_kind":"pith_short_8","alias_value":"6ZWYUFCT","created_at":"2026-05-18T12:31:03Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:6ZWYUFCTSJU7V2WIUMAJYZOBDY","target":"record","payload":{"canonical_record":{"source":{"id":"1708.02838","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-08-09T13:51:47Z","cross_cats_sorted":[],"title_canon_sha256":"e4bcc70d25f8964b00aed4b5e7d2559a358f06c0173ad8765adb3707f21d8188","abstract_canon_sha256":"5b2ecb802942dd619924b44febcfc5336a6fd926b70a88d5f0ef74bf06974ebb"},"schema_version":"1.0"},"canonical_sha256":"f66d8a14539269faeac8a3009c65c11e2e5741bc211a0d262d305033dfd1e7c8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:38:19.382199Z","signature_b64":"LHno60HvM6ol2fidZgetufAesjhwGs2Kt2/9Pdk4mh898x9P1rKZPLMC5q+EDUZYXhnZ+Mac1kwbEpsu2LNAAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f66d8a14539269faeac8a3009c65c11e2e5741bc211a0d262d305033dfd1e7c8","last_reissued_at":"2026-05-18T00:38:19.381428Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:38:19.381428Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1708.02838","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:38:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+bDb4QWPT6pr2dcq7yvEJbXodD4A8vT7lGnYIUF/7P63OPLHBUBiHzOD4RAvWy+UlW83RcJLbEra5ADtQ/XiBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T20:17:08.665891Z"},"content_sha256":"c8ecfe8a0d37b42ad5952d361f91d4ed51e04cc376a44f033719b7fe5394f85a","schema_version":"1.0","event_id":"sha256:c8ecfe8a0d37b42ad5952d361f91d4ed51e04cc376a44f033719b7fe5394f85a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:6ZWYUFCTSJU7V2WIUMAJYZOBDY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Decoupled Learning of Environment Characteristics for Safe Exploration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Bart Dhoedt, Pieter Simoens, Pieter Van Molle, Sam Leroux, Steven Bohez, Tim Verbelen","submitted_at":"2017-08-09T13:51:47Z","abstract_excerpt":"Reinforcement learning is a proven technique for an agent to learn a task. However, when learning a task using reinforcement learning, the agent cannot distinguish the characteristics of the environment from those of the task. This makes it harder to transfer skills between tasks in the same environment. Furthermore, this does not reduce risk when training for a new task. In this paper, we introduce an approach to decouple the environment characteristics from the task-specific ones, allowing an agent to develop a sense of survival. We evaluate our approach in an environment where an agent must"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.02838","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:38:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6HN5oryt3Xk1mplhoQjJrJHwrgVqUxhtzqk50QpBp6+PqrknThSY+BH9/S4XL5aXKnRGDccK5HCPLR9in1r1AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T20:17:08.666233Z"},"content_sha256":"580127c4f1b405ad03605168fa2235b8dc4e8998333b59b4815d414c15a821c3","schema_version":"1.0","event_id":"sha256:580127c4f1b405ad03605168fa2235b8dc4e8998333b59b4815d414c15a821c3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY/bundle.json","state_url":"https://pith.science/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T20:17:08Z","links":{"resolver":"https://pith.science/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY","bundle":"https://pith.science/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY/bundle.json","state":"https://pith.science/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6ZWYUFCTSJU7V2WIUMAJYZOBDY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:6ZWYUFCTSJU7V2WIUMAJYZOBDY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5b2ecb802942dd619924b44febcfc5336a6fd926b70a88d5f0ef74bf06974ebb","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-08-09T13:51:47Z","title_canon_sha256":"e4bcc70d25f8964b00aed4b5e7d2559a358f06c0173ad8765adb3707f21d8188"},"schema_version":"1.0","source":{"id":"1708.02838","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1708.02838","created_at":"2026-05-18T00:38:19Z"},{"alias_kind":"arxiv_version","alias_value":"1708.02838v1","created_at":"2026-05-18T00:38:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.02838","created_at":"2026-05-18T00:38:19Z"},{"alias_kind":"pith_short_12","alias_value":"6ZWYUFCTSJU7","created_at":"2026-05-18T12:31:03Z"},{"alias_kind":"pith_short_16","alias_value":"6ZWYUFCTSJU7V2WI","created_at":"2026-05-18T12:31:03Z"},{"alias_kind":"pith_short_8","alias_value":"6ZWYUFCT","created_at":"2026-05-18T12:31:03Z"}],"graph_snapshots":[{"event_id":"sha256:580127c4f1b405ad03605168fa2235b8dc4e8998333b59b4815d414c15a821c3","target":"graph","created_at":"2026-05-18T00:38:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning is a proven technique for an agent to learn a task. However, when learning a task using reinforcement learning, the agent cannot distinguish the characteristics of the environment from those of the task. This makes it harder to transfer skills between tasks in the same environment. Furthermore, this does not reduce risk when training for a new task. In this paper, we introduce an approach to decouple the environment characteristics from the task-specific ones, allowing an agent to develop a sense of survival. We evaluate our approach in an environment where an agent must","authors_text":"Bart Dhoedt, Pieter Simoens, Pieter Van Molle, Sam Leroux, Steven Bohez, Tim Verbelen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-08-09T13:51:47Z","title":"Decoupled Learning of Environment Characteristics for Safe Exploration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.02838","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c8ecfe8a0d37b42ad5952d361f91d4ed51e04cc376a44f033719b7fe5394f85a","target":"record","created_at":"2026-05-18T00:38:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5b2ecb802942dd619924b44febcfc5336a6fd926b70a88d5f0ef74bf06974ebb","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-08-09T13:51:47Z","title_canon_sha256":"e4bcc70d25f8964b00aed4b5e7d2559a358f06c0173ad8765adb3707f21d8188"},"schema_version":"1.0","source":{"id":"1708.02838","kind":"arxiv","version":1}},"canonical_sha256":"f66d8a14539269faeac8a3009c65c11e2e5741bc211a0d262d305033dfd1e7c8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f66d8a14539269faeac8a3009c65c11e2e5741bc211a0d262d305033dfd1e7c8","first_computed_at":"2026-05-18T00:38:19.381428Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:38:19.381428Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LHno60HvM6ol2fidZgetufAesjhwGs2Kt2/9Pdk4mh898x9P1rKZPLMC5q+EDUZYXhnZ+Mac1kwbEpsu2LNAAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:38:19.382199Z","signed_message":"canonical_sha256_bytes"},"source_id":"1708.02838","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c8ecfe8a0d37b42ad5952d361f91d4ed51e04cc376a44f033719b7fe5394f85a","sha256:580127c4f1b405ad03605168fa2235b8dc4e8998333b59b4815d414c15a821c3"],"state_sha256":"2017f35cfd964161509d5cf4537006c24d26563b91416a2039d5a5c7fc3208ec"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dz5OIGecJZpHb7a2XUkjVDrG32qfamSmDTTO+FNSYVdJ2Y2qXaXFj1dg3rQxz3aVFbOZUfzewivdPGVgpbB/Cw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T20:17:08.668184Z","bundle_sha256":"a2a2ed065a474f39f53f13d05c9d9643ab6dafbb16298e1390c215f2649c2531"}}