{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:RLLYJDLNTPBTNW3BTRMLKVJJR4","short_pith_number":"pith:RLLYJDLN","canonical_record":{"source":{"id":"1707.05173","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-17T14:13:40Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"3b611b882b96044f0671e9e778f53f21a3a1fb62f6eef3acc30096251bb78832","abstract_canon_sha256":"94f7e54319f9409af9f0ce1e5813ed0d966f5265ea3f6c2674b8be593df5520c"},"schema_version":"1.0"},"canonical_sha256":"8ad7848d6d9bc336db619c58b555298f12ef629f48aa9738646f5d5b6f3311ae","source":{"kind":"arxiv","id":"1707.05173","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.05173","created_at":"2026-05-18T00:40:09Z"},{"alias_kind":"arxiv_version","alias_value":"1707.05173v1","created_at":"2026-05-18T00:40:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.05173","created_at":"2026-05-18T00:40:09Z"},{"alias_kind":"pith_short_12","alias_value":"RLLYJDLNTPBT","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_16","alias_value":"RLLYJDLNTPBTNW3B","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_8","alias_value":"RLLYJDLN","created_at":"2026-05-18T12:31:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:RLLYJDLNTPBTNW3BTRMLKVJJR4","target":"record","payload":{"canonical_record":{"source":{"id":"1707.05173","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-17T14:13:40Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"3b611b882b96044f0671e9e778f53f21a3a1fb62f6eef3acc30096251bb78832","abstract_canon_sha256":"94f7e54319f9409af9f0ce1e5813ed0d966f5265ea3f6c2674b8be593df5520c"},"schema_version":"1.0"},"canonical_sha256":"8ad7848d6d9bc336db619c58b555298f12ef629f48aa9738646f5d5b6f3311ae","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:40:09.258602Z","signature_b64":"kTQyPPBiiXqy/TyeZUpgw5y+RMZgejkCdOO5B0bcjfkO1HIrLmaJ6qZnNWFvyez/Cem5oyTJ6QcNZqqSR//lAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8ad7848d6d9bc336db619c58b555298f12ef629f48aa9738646f5d5b6f3311ae","last_reissued_at":"2026-05-18T00:40:09.258125Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:40:09.258125Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.05173","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DbimPKe9IfkdGllshK+szXutWjnYEcCuHX2eJmu58kenx+WJaHy2FHzYadEryP/2n56XsfAsCFTKWUXpHlLwDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T22:57:01.749527Z"},"content_sha256":"8b2e5e902e9b747bba0286c590e723938b70280f524deb946933cf60df331970","schema_version":"1.0","event_id":"sha256:8b2e5e902e9b747bba0286c590e723938b70280f524deb946933cf60df331970"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:RLLYJDLNTPBTNW3BTRMLKVJJR4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Trial without Error: Towards Safe Reinforcement Learning via Human Intervention","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"cs.AI","authors_text":"Andreas Stuhlmueller, Girish Sastry, Owain Evans, William Saunders","submitted_at":"2017-07-17T14:13:40Z","abstract_excerpt":"AI systems are increasingly applied to complex tasks that involve interaction with humans. During training, such systems are potentially dangerous, as they haven't yet learned to avoid actions that could cause serious harm. How can an AI system explore and learn without making a single mistake that harms humans or otherwise causes serious damage? For model-free reinforcement learning, having a human \"in the loop\" and ready to intervene is currently the only way to prevent all catastrophes. We formalize human intervention for RL and show how to reduce the human labor required by training a supe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.05173","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZX221v9SnTCVhsGKAVLkVUHA3ug50K2N8c4XXouvSfWB5YpJDQRVEGEqV7Jer4X1vkt4Sk72F8uCn6A/EOzPCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T22:57:01.749909Z"},"content_sha256":"76be2ee49599e8888565f753f74cd7fffe915f6e0981cf65d5d5a32ca46147d4","schema_version":"1.0","event_id":"sha256:76be2ee49599e8888565f753f74cd7fffe915f6e0981cf65d5d5a32ca46147d4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4/bundle.json","state_url":"https://pith.science/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T22:57:01Z","links":{"resolver":"https://pith.science/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4","bundle":"https://pith.science/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4/bundle.json","state":"https://pith.science/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RLLYJDLNTPBTNW3BTRMLKVJJR4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:RLLYJDLNTPBTNW3BTRMLKVJJR4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"94f7e54319f9409af9f0ce1e5813ed0d966f5265ea3f6c2674b8be593df5520c","cross_cats_sorted":["cs.LG","cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-17T14:13:40Z","title_canon_sha256":"3b611b882b96044f0671e9e778f53f21a3a1fb62f6eef3acc30096251bb78832"},"schema_version":"1.0","source":{"id":"1707.05173","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.05173","created_at":"2026-05-18T00:40:09Z"},{"alias_kind":"arxiv_version","alias_value":"1707.05173v1","created_at":"2026-05-18T00:40:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.05173","created_at":"2026-05-18T00:40:09Z"},{"alias_kind":"pith_short_12","alias_value":"RLLYJDLNTPBT","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_16","alias_value":"RLLYJDLNTPBTNW3B","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_8","alias_value":"RLLYJDLN","created_at":"2026-05-18T12:31:39Z"}],"graph_snapshots":[{"event_id":"sha256:76be2ee49599e8888565f753f74cd7fffe915f6e0981cf65d5d5a32ca46147d4","target":"graph","created_at":"2026-05-18T00:40:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"AI systems are increasingly applied to complex tasks that involve interaction with humans. During training, such systems are potentially dangerous, as they haven't yet learned to avoid actions that could cause serious harm. How can an AI system explore and learn without making a single mistake that harms humans or otherwise causes serious damage? For model-free reinforcement learning, having a human \"in the loop\" and ready to intervene is currently the only way to prevent all catastrophes. We formalize human intervention for RL and show how to reduce the human labor required by training a supe","authors_text":"Andreas Stuhlmueller, Girish Sastry, Owain Evans, William Saunders","cross_cats":["cs.LG","cs.NE"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-17T14:13:40Z","title":"Trial without Error: Towards Safe Reinforcement Learning via Human Intervention"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.05173","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8b2e5e902e9b747bba0286c590e723938b70280f524deb946933cf60df331970","target":"record","created_at":"2026-05-18T00:40:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"94f7e54319f9409af9f0ce1e5813ed0d966f5265ea3f6c2674b8be593df5520c","cross_cats_sorted":["cs.LG","cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-17T14:13:40Z","title_canon_sha256":"3b611b882b96044f0671e9e778f53f21a3a1fb62f6eef3acc30096251bb78832"},"schema_version":"1.0","source":{"id":"1707.05173","kind":"arxiv","version":1}},"canonical_sha256":"8ad7848d6d9bc336db619c58b555298f12ef629f48aa9738646f5d5b6f3311ae","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8ad7848d6d9bc336db619c58b555298f12ef629f48aa9738646f5d5b6f3311ae","first_computed_at":"2026-05-18T00:40:09.258125Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:40:09.258125Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kTQyPPBiiXqy/TyeZUpgw5y+RMZgejkCdOO5B0bcjfkO1HIrLmaJ6qZnNWFvyez/Cem5oyTJ6QcNZqqSR//lAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:40:09.258602Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.05173","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8b2e5e902e9b747bba0286c590e723938b70280f524deb946933cf60df331970","sha256:76be2ee49599e8888565f753f74cd7fffe915f6e0981cf65d5d5a32ca46147d4"],"state_sha256":"28f295c7567fff02f21b6aa6dcd1ebb094ee95e68d680692f64ec33ecea24776"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"c1kt/F70dErbnc7kfk2LSlSkPIDJ/O5AsvY6rnBRgFTf18uKCp/l6ugFAoiqKZLXAQ6jtsnpq24pRSnpnBNxDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T22:57:01.752074Z","bundle_sha256":"cb30986da948ca328a064a1180d056c380533b140766d88377c84f319b21bc71"}}