{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:O3W533RDW2P4XM4DB4SSWWXTCU","short_pith_number":"pith:O3W533RD","canonical_record":{"source":{"id":"1811.05590","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T01:30:00Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1f6c10c7b50192f089b4f0992b00db2b59c468e24867e734a66daf0510675dfa","abstract_canon_sha256":"11c6cd4e044ecd01293ee3ba82ea5697abcc4cd037bdfb20a09266520dd1ee5e"},"schema_version":"1.0"},"canonical_sha256":"76edddee23b69fcbb3830f252b5af315220397ba9a266d9cd6487aa9150d986d","source":{"kind":"arxiv","id":"1811.05590","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.05590","created_at":"2026-05-18T00:00:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.05590v1","created_at":"2026-05-18T00:00:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.05590","created_at":"2026-05-18T00:00:42Z"},{"alias_kind":"pith_short_12","alias_value":"O3W533RDW2P4","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"O3W533RDW2P4XM4D","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"O3W533RD","created_at":"2026-05-18T12:32:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:O3W533RDW2P4XM4DB4SSWWXTCU","target":"record","payload":{"canonical_record":{"source":{"id":"1811.05590","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T01:30:00Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1f6c10c7b50192f089b4f0992b00db2b59c468e24867e734a66daf0510675dfa","abstract_canon_sha256":"11c6cd4e044ecd01293ee3ba82ea5697abcc4cd037bdfb20a09266520dd1ee5e"},"schema_version":"1.0"},"canonical_sha256":"76edddee23b69fcbb3830f252b5af315220397ba9a266d9cd6487aa9150d986d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:42.571354Z","signature_b64":"i1QLlkcG3JFvdELoFgwc3tJ0RSQ7vhbAmWTnk7nrDe2U5KqF2Oy1tITkavXJ5mTDjNXF1zKcT5b2vf1sHR0ZCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"76edddee23b69fcbb3830f252b5af315220397ba9a266d9cd6487aa9150d986d","last_reissued_at":"2026-05-18T00:00:42.570882Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:42.570882Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.05590","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tLBYzcK6WmF5EfN1g0xyyNnTz1HszCalO0ZV1pT+1gbbRyIHqOjMKb3oY48+B5U8kMGFBcFcxLe4iMatIag4CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T16:04:03.451088Z"},"content_sha256":"e1b501b15d5f8c15117d0304c3bcb4db82c64dfbd0954feebca3809b76eecfd2","schema_version":"1.0","event_id":"sha256:e1b501b15d5f8c15117d0304c3bcb4db82c64dfbd0954feebca3809b76eecfd2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:O3W533RDW2P4XM4DB4SSWWXTCU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Emergence of Addictive Behaviors in Reinforcement Learning Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Arslan Munir, Roman V. Yampolskiy, Vahid Behzadan","submitted_at":"2018-11-14T01:30:00Z","abstract_excerpt":"This paper presents a novel approach to the technical analysis of wireheading in intelligent agents. Inspired by the natural analogues of wireheading and their prevalent manifestations, we propose the modeling of such phenomenon in Reinforcement Learning (RL) agents as psychological disorders. In a preliminary step towards evaluating this proposal, we study the feasibility and dynamics of emergent addictive policies in Q-learning agents in the tractable environment of the game of Snake. We consider a slightly modified settings for this game, in which the environment provides a \"drug\" seed alon"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.05590","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"k5XsDsn/BYN+oVIfgT9gAIs4ow0/vbSgxsEhMXnJLx1kG37hHyv1HM9l8FhLDY5k85sKEMcZn0650PuHvDG7BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T16:04:03.451744Z"},"content_sha256":"6253eca8a272f3905afeb94c36d76e2f935b80acc5799d8fb2e50f4e0d31da7a","schema_version":"1.0","event_id":"sha256:6253eca8a272f3905afeb94c36d76e2f935b80acc5799d8fb2e50f4e0d31da7a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/O3W533RDW2P4XM4DB4SSWWXTCU/bundle.json","state_url":"https://pith.science/pith/O3W533RDW2P4XM4DB4SSWWXTCU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/O3W533RDW2P4XM4DB4SSWWXTCU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T16:04:03Z","links":{"resolver":"https://pith.science/pith/O3W533RDW2P4XM4DB4SSWWXTCU","bundle":"https://pith.science/pith/O3W533RDW2P4XM4DB4SSWWXTCU/bundle.json","state":"https://pith.science/pith/O3W533RDW2P4XM4DB4SSWWXTCU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/O3W533RDW2P4XM4DB4SSWWXTCU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:O3W533RDW2P4XM4DB4SSWWXTCU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"11c6cd4e044ecd01293ee3ba82ea5697abcc4cd037bdfb20a09266520dd1ee5e","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T01:30:00Z","title_canon_sha256":"1f6c10c7b50192f089b4f0992b00db2b59c468e24867e734a66daf0510675dfa"},"schema_version":"1.0","source":{"id":"1811.05590","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.05590","created_at":"2026-05-18T00:00:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.05590v1","created_at":"2026-05-18T00:00:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.05590","created_at":"2026-05-18T00:00:42Z"},{"alias_kind":"pith_short_12","alias_value":"O3W533RDW2P4","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"O3W533RDW2P4XM4D","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"O3W533RD","created_at":"2026-05-18T12:32:40Z"}],"graph_snapshots":[{"event_id":"sha256:6253eca8a272f3905afeb94c36d76e2f935b80acc5799d8fb2e50f4e0d31da7a","target":"graph","created_at":"2026-05-18T00:00:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper presents a novel approach to the technical analysis of wireheading in intelligent agents. Inspired by the natural analogues of wireheading and their prevalent manifestations, we propose the modeling of such phenomenon in Reinforcement Learning (RL) agents as psychological disorders. In a preliminary step towards evaluating this proposal, we study the feasibility and dynamics of emergent addictive policies in Q-learning agents in the tractable environment of the game of Snake. We consider a slightly modified settings for this game, in which the environment provides a \"drug\" seed alon","authors_text":"Arslan Munir, Roman V. Yampolskiy, Vahid Behzadan","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T01:30:00Z","title":"Emergence of Addictive Behaviors in Reinforcement Learning Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.05590","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e1b501b15d5f8c15117d0304c3bcb4db82c64dfbd0954feebca3809b76eecfd2","target":"record","created_at":"2026-05-18T00:00:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"11c6cd4e044ecd01293ee3ba82ea5697abcc4cd037bdfb20a09266520dd1ee5e","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T01:30:00Z","title_canon_sha256":"1f6c10c7b50192f089b4f0992b00db2b59c468e24867e734a66daf0510675dfa"},"schema_version":"1.0","source":{"id":"1811.05590","kind":"arxiv","version":1}},"canonical_sha256":"76edddee23b69fcbb3830f252b5af315220397ba9a266d9cd6487aa9150d986d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"76edddee23b69fcbb3830f252b5af315220397ba9a266d9cd6487aa9150d986d","first_computed_at":"2026-05-18T00:00:42.570882Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:00:42.570882Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"i1QLlkcG3JFvdELoFgwc3tJ0RSQ7vhbAmWTnk7nrDe2U5KqF2Oy1tITkavXJ5mTDjNXF1zKcT5b2vf1sHR0ZCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:00:42.571354Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.05590","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e1b501b15d5f8c15117d0304c3bcb4db82c64dfbd0954feebca3809b76eecfd2","sha256:6253eca8a272f3905afeb94c36d76e2f935b80acc5799d8fb2e50f4e0d31da7a"],"state_sha256":"02c91b1bfd0d9dcd87fab49d6711743fd3f788b6e5d7112b615611d1412e87e7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"409B00rc4qQEM5oA53cIF9lAABJb7wo9UjHDjMvd34SbXCp/+wZDX/6UHKCJ0pFaaSvciv9sCDwwmNB+ILW1AA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T16:04:03.454771Z","bundle_sha256":"610b8198c61253c923901a7afb59ca5937479a7e1b1aaaf2be62f86ec0be2273"}}