{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:OMUT5TSCKLE4XJGJEN4OVOD3KG","short_pith_number":"pith:OMUT5TSC","canonical_record":{"source":{"id":"1712.04172","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-12T08:35:52Z","cross_cats_sorted":[],"title_canon_sha256":"98d68cb46680e436e50ad0dd40eb3a2dfd51921b83563f8141d6ab92e41da60b","abstract_canon_sha256":"d45e60579a0fc034316e972f42d0dd38b03ec916094866c82645dadaa4bacf8e"},"schema_version":"1.0"},"canonical_sha256":"73293ece4252c9cba4c92378eab87b51859659f20fc5c0abe0f726ac1b963530","source":{"kind":"arxiv","id":"1712.04172","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.04172","created_at":"2026-05-18T00:06:12Z"},{"alias_kind":"arxiv_version","alias_value":"1712.04172v2","created_at":"2026-05-18T00:06:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.04172","created_at":"2026-05-18T00:06:12Z"},{"alias_kind":"pith_short_12","alias_value":"OMUT5TSCKLE4","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OMUT5TSCKLE4XJGJ","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OMUT5TSC","created_at":"2026-05-18T12:31:34Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:OMUT5TSCKLE4XJGJEN4OVOD3KG","target":"record","payload":{"canonical_record":{"source":{"id":"1712.04172","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-12T08:35:52Z","cross_cats_sorted":[],"title_canon_sha256":"98d68cb46680e436e50ad0dd40eb3a2dfd51921b83563f8141d6ab92e41da60b","abstract_canon_sha256":"d45e60579a0fc034316e972f42d0dd38b03ec916094866c82645dadaa4bacf8e"},"schema_version":"1.0"},"canonical_sha256":"73293ece4252c9cba4c92378eab87b51859659f20fc5c0abe0f726ac1b963530","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:06:12.004669Z","signature_b64":"wGlASDT/OQGVqzt4F+uYHCjZojm4+H3M06lzGVZCoS3ypE/d97oEfFJk7m2Cm2WPr2RwCnLwPsaVTpPR1FXiCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"73293ece4252c9cba4c92378eab87b51859659f20fc5c0abe0f726ac1b963530","last_reissued_at":"2026-05-18T00:06:12.003990Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:06:12.003990Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.04172","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ThXSZggez5hLh2CW+gLRPIVbcJdtORi2wWjMj1653f246/UQ3SU9McycbXI3N4RBHbMIn1gUBnF1QjAKFtSeCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T06:20:08.575460Z"},"content_sha256":"408cbf106c3c51db6fc71a5c90e6a50df0ba13524a5a3c4718ef2891085899d0","schema_version":"1.0","event_id":"sha256:408cbf106c3c51db6fc71a5c90e6a50df0ba13524a5a3c4718ef2891085899d0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:OMUT5TSCKLE4XJGJEN4OVOD3KG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Low-Cost Ethics Shaping Approach for Designing Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Shou-De Lin, Yueh-Hua Wu","submitted_at":"2017-12-12T08:35:52Z","abstract_excerpt":"This paper proposes a low-cost, easily realizable strategy to equip a reinforcement learning (RL) agent the capability of behaving ethically. Our model allows the designers of RL agents to solely focus on the task to achieve, without having to worry about the implementation of multiple trivial ethical patterns to follow. Based on the assumption that the majority of human behavior, regardless which goals they are achieving, is ethical, our design integrates human policy with the RL policy to achieve the target objective with less chance of violating the ethical code that human beings normally o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.04172","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CTDk8reyJ3V9gWsq4drNDjdqAFJxV+v7bC67HJNLcrfye8Xy1s7Y2CwRhlJXCahcwG/5o7P+spHqAedrEruiBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T06:20:08.575817Z"},"content_sha256":"6fa10f573f09e61e8114d0311140d494eb7a881444f616870d5522c27034e319","schema_version":"1.0","event_id":"sha256:6fa10f573f09e61e8114d0311140d494eb7a881444f616870d5522c27034e319"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG/bundle.json","state_url":"https://pith.science/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T06:20:08Z","links":{"resolver":"https://pith.science/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG","bundle":"https://pith.science/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG/bundle.json","state":"https://pith.science/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OMUT5TSCKLE4XJGJEN4OVOD3KG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:OMUT5TSCKLE4XJGJEN4OVOD3KG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d45e60579a0fc034316e972f42d0dd38b03ec916094866c82645dadaa4bacf8e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-12T08:35:52Z","title_canon_sha256":"98d68cb46680e436e50ad0dd40eb3a2dfd51921b83563f8141d6ab92e41da60b"},"schema_version":"1.0","source":{"id":"1712.04172","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.04172","created_at":"2026-05-18T00:06:12Z"},{"alias_kind":"arxiv_version","alias_value":"1712.04172v2","created_at":"2026-05-18T00:06:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.04172","created_at":"2026-05-18T00:06:12Z"},{"alias_kind":"pith_short_12","alias_value":"OMUT5TSCKLE4","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OMUT5TSCKLE4XJGJ","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OMUT5TSC","created_at":"2026-05-18T12:31:34Z"}],"graph_snapshots":[{"event_id":"sha256:6fa10f573f09e61e8114d0311140d494eb7a881444f616870d5522c27034e319","target":"graph","created_at":"2026-05-18T00:06:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper proposes a low-cost, easily realizable strategy to equip a reinforcement learning (RL) agent the capability of behaving ethically. Our model allows the designers of RL agents to solely focus on the task to achieve, without having to worry about the implementation of multiple trivial ethical patterns to follow. Based on the assumption that the majority of human behavior, regardless which goals they are achieving, is ethical, our design integrates human policy with the RL policy to achieve the target objective with less chance of violating the ethical code that human beings normally o","authors_text":"Shou-De Lin, Yueh-Hua Wu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-12T08:35:52Z","title":"A Low-Cost Ethics Shaping Approach for Designing Reinforcement Learning Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.04172","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:408cbf106c3c51db6fc71a5c90e6a50df0ba13524a5a3c4718ef2891085899d0","target":"record","created_at":"2026-05-18T00:06:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d45e60579a0fc034316e972f42d0dd38b03ec916094866c82645dadaa4bacf8e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-12T08:35:52Z","title_canon_sha256":"98d68cb46680e436e50ad0dd40eb3a2dfd51921b83563f8141d6ab92e41da60b"},"schema_version":"1.0","source":{"id":"1712.04172","kind":"arxiv","version":2}},"canonical_sha256":"73293ece4252c9cba4c92378eab87b51859659f20fc5c0abe0f726ac1b963530","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"73293ece4252c9cba4c92378eab87b51859659f20fc5c0abe0f726ac1b963530","first_computed_at":"2026-05-18T00:06:12.003990Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:06:12.003990Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wGlASDT/OQGVqzt4F+uYHCjZojm4+H3M06lzGVZCoS3ypE/d97oEfFJk7m2Cm2WPr2RwCnLwPsaVTpPR1FXiCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:06:12.004669Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.04172","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:408cbf106c3c51db6fc71a5c90e6a50df0ba13524a5a3c4718ef2891085899d0","sha256:6fa10f573f09e61e8114d0311140d494eb7a881444f616870d5522c27034e319"],"state_sha256":"6dfc0d59882464f013857d7cc6916dee9d4c6e12e05d8c768528778c00d6a031"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"E7qOzxV8CqlAgD4+wmJdwNCS+t7VAsVlt22II43xdeql9CrGPk/LJD5jdGw9KILuRmveHKG/s/5a+5hQ+r5bBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T06:20:08.577886Z","bundle_sha256":"4c3682c1266ab93770dcdcd00c14b926c1b870943eb550a23147e25e12055b4f"}}