{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:IJDQISY325LQTT3X5ZV32GA4UX","short_pith_number":"pith:IJDQISY3","canonical_record":{"source":{"id":"1506.02632","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-06-08T19:37:55Z","cross_cats_sorted":["math.OC"],"title_canon_sha256":"d70341d27d1f36641fd881af87ab6b492ad7f929ccaee22c05d474c6de8d5925","abstract_canon_sha256":"72813c08897b495b5720cdbd7a045385149853865caccdade931d6af425f013e"},"schema_version":"1.0"},"canonical_sha256":"4247044b1bd75709cf77ee6bbd181ca5cfa594412a288839bd6f4c9fca418e6b","source":{"kind":"arxiv","id":"1506.02632","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1506.02632","created_at":"2026-05-18T01:19:53Z"},{"alias_kind":"arxiv_version","alias_value":"1506.02632v3","created_at":"2026-05-18T01:19:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.02632","created_at":"2026-05-18T01:19:53Z"},{"alias_kind":"pith_short_12","alias_value":"IJDQISY325LQ","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_16","alias_value":"IJDQISY325LQTT3X","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_8","alias_value":"IJDQISY3","created_at":"2026-05-18T12:29:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:IJDQISY325LQTT3X5ZV32GA4UX","target":"record","payload":{"canonical_record":{"source":{"id":"1506.02632","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-06-08T19:37:55Z","cross_cats_sorted":["math.OC"],"title_canon_sha256":"d70341d27d1f36641fd881af87ab6b492ad7f929ccaee22c05d474c6de8d5925","abstract_canon_sha256":"72813c08897b495b5720cdbd7a045385149853865caccdade931d6af425f013e"},"schema_version":"1.0"},"canonical_sha256":"4247044b1bd75709cf77ee6bbd181ca5cfa594412a288839bd6f4c9fca418e6b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:19:53.834842Z","signature_b64":"07p0OkTLY5LVdY69cpj/rusdmoEIQreQBHJiaq9NN9ucXRA4NaUbvN4yzlE6ALJflm8+sc0k0lmVDt1tWkK0Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4247044b1bd75709cf77ee6bbd181ca5cfa594412a288839bd6f4c9fca418e6b","last_reissued_at":"2026-05-18T01:19:53.834463Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:19:53.834463Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1506.02632","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:19:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Vx97cptG4FrR7qBINR5IxHX/tnxYxJ8pRuMRE+oERdULxfZzWh1GS6vbU2EprLAosLjnd3TYk076mvsBmuVbCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T03:43:13.780025Z"},"content_sha256":"076877a50d7c1325aeadb8e2294e60fec5e3419f51c129632c7324742a7c4f5c","schema_version":"1.0","event_id":"sha256:076877a50d7c1325aeadb8e2294e60fec5e3419f51c129632c7324742a7c4f5c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:IJDQISY325LQTT3X5ZV32GA4UX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Cumulative Prospect Theory Meets Reinforcement Learning: Prediction and Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC"],"primary_cat":"cs.LG","authors_text":"Cheng Jie, Csaba Szepesv\\'ari, Michael Fu, Prashanth L.A., Steve Marcus","submitted_at":"2015-06-08T19:37:55Z","abstract_excerpt":"Cumulative prospect theory (CPT) is known to model human decisions well, with substantial empirical evidence supporting this claim. CPT works by distorting probabilities and is more general than the classic expected utility and coherent risk measures. We bring this idea to a risk-sensitive reinforcement learning (RL) setting and design algorithms for both estimation and control. The RL setting presents two particular challenges when CPT is applied: estimating the CPT objective requires estimations of the entire distribution of the value function and finding a randomized optimal policy. The est"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.02632","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:19:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nHK3ULM38b8Xn9M63YS8N3jMv4ojFh9HvAPGe6OWMSMxBJQx4qQTF/BX/Nnxk9V/QvWKggtYpVoVkCZrFmwzAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T03:43:13.780526Z"},"content_sha256":"9145e50ef388f2acbfcd5ecfaf56141f36ff21e6e7c482d0ea0c21bb2ac86aff","schema_version":"1.0","event_id":"sha256:9145e50ef388f2acbfcd5ecfaf56141f36ff21e6e7c482d0ea0c21bb2ac86aff"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IJDQISY325LQTT3X5ZV32GA4UX/bundle.json","state_url":"https://pith.science/pith/IJDQISY325LQTT3X5ZV32GA4UX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IJDQISY325LQTT3X5ZV32GA4UX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T03:43:13Z","links":{"resolver":"https://pith.science/pith/IJDQISY325LQTT3X5ZV32GA4UX","bundle":"https://pith.science/pith/IJDQISY325LQTT3X5ZV32GA4UX/bundle.json","state":"https://pith.science/pith/IJDQISY325LQTT3X5ZV32GA4UX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IJDQISY325LQTT3X5ZV32GA4UX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:IJDQISY325LQTT3X5ZV32GA4UX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"72813c08897b495b5720cdbd7a045385149853865caccdade931d6af425f013e","cross_cats_sorted":["math.OC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-06-08T19:37:55Z","title_canon_sha256":"d70341d27d1f36641fd881af87ab6b492ad7f929ccaee22c05d474c6de8d5925"},"schema_version":"1.0","source":{"id":"1506.02632","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1506.02632","created_at":"2026-05-18T01:19:53Z"},{"alias_kind":"arxiv_version","alias_value":"1506.02632v3","created_at":"2026-05-18T01:19:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.02632","created_at":"2026-05-18T01:19:53Z"},{"alias_kind":"pith_short_12","alias_value":"IJDQISY325LQ","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_16","alias_value":"IJDQISY325LQTT3X","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_8","alias_value":"IJDQISY3","created_at":"2026-05-18T12:29:25Z"}],"graph_snapshots":[{"event_id":"sha256:9145e50ef388f2acbfcd5ecfaf56141f36ff21e6e7c482d0ea0c21bb2ac86aff","target":"graph","created_at":"2026-05-18T01:19:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Cumulative prospect theory (CPT) is known to model human decisions well, with substantial empirical evidence supporting this claim. CPT works by distorting probabilities and is more general than the classic expected utility and coherent risk measures. We bring this idea to a risk-sensitive reinforcement learning (RL) setting and design algorithms for both estimation and control. The RL setting presents two particular challenges when CPT is applied: estimating the CPT objective requires estimations of the entire distribution of the value function and finding a randomized optimal policy. The est","authors_text":"Cheng Jie, Csaba Szepesv\\'ari, Michael Fu, Prashanth L.A., Steve Marcus","cross_cats":["math.OC"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-06-08T19:37:55Z","title":"Cumulative Prospect Theory Meets Reinforcement Learning: Prediction and Control"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.02632","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:076877a50d7c1325aeadb8e2294e60fec5e3419f51c129632c7324742a7c4f5c","target":"record","created_at":"2026-05-18T01:19:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"72813c08897b495b5720cdbd7a045385149853865caccdade931d6af425f013e","cross_cats_sorted":["math.OC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-06-08T19:37:55Z","title_canon_sha256":"d70341d27d1f36641fd881af87ab6b492ad7f929ccaee22c05d474c6de8d5925"},"schema_version":"1.0","source":{"id":"1506.02632","kind":"arxiv","version":3}},"canonical_sha256":"4247044b1bd75709cf77ee6bbd181ca5cfa594412a288839bd6f4c9fca418e6b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4247044b1bd75709cf77ee6bbd181ca5cfa594412a288839bd6f4c9fca418e6b","first_computed_at":"2026-05-18T01:19:53.834463Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:19:53.834463Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"07p0OkTLY5LVdY69cpj/rusdmoEIQreQBHJiaq9NN9ucXRA4NaUbvN4yzlE6ALJflm8+sc0k0lmVDt1tWkK0Ag==","signature_status":"signed_v1","signed_at":"2026-05-18T01:19:53.834842Z","signed_message":"canonical_sha256_bytes"},"source_id":"1506.02632","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:076877a50d7c1325aeadb8e2294e60fec5e3419f51c129632c7324742a7c4f5c","sha256:9145e50ef388f2acbfcd5ecfaf56141f36ff21e6e7c482d0ea0c21bb2ac86aff"],"state_sha256":"d3550b1399db4dd810194eead4591bafac70a1750d7da6b53d653c2e8f1e38fb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nHNGOgLSBXHQ9TtYWff7bRNydos/3kNVu8emCpsZFLF53QBaHxpwLGkBhqfNqLI6wyyfzAtQXm6uv9i38s2YCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T03:43:13.782740Z","bundle_sha256":"2f9093473e2eef50c1cbb3f70ceca2b313f6380e79ea4f64db5bbb8b43512a89"}}