{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2011:OJLYGOZCV7SQBVP7ANWKNVJA4K","short_pith_number":"pith:OJLYGOZC","canonical_record":{"source":{"id":"1104.5687","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2011-04-29T17:45:50Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"81168f98306a4204a146e7d6463d3b593448c0449012f99733878e7b7701dc9d","abstract_canon_sha256":"115a8b65ec727d5fed03a89ac6bce0f3183bc911f68cb661a1e9f8de9da5d104"},"schema_version":"1.0"},"canonical_sha256":"7257833b22afe500d5ff036ca6d520e2aef86a6440157db6d8ef1d12851b6418","source":{"kind":"arxiv","id":"1104.5687","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1104.5687","created_at":"2026-05-18T04:19:11Z"},{"alias_kind":"arxiv_version","alias_value":"1104.5687v2","created_at":"2026-05-18T04:19:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1104.5687","created_at":"2026-05-18T04:19:11Z"},{"alias_kind":"pith_short_12","alias_value":"OJLYGOZCV7SQ","created_at":"2026-05-18T12:26:37Z"},{"alias_kind":"pith_short_16","alias_value":"OJLYGOZCV7SQBVP7","created_at":"2026-05-18T12:26:37Z"},{"alias_kind":"pith_short_8","alias_value":"OJLYGOZC","created_at":"2026-05-18T12:26:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2011:OJLYGOZCV7SQBVP7ANWKNVJA4K","target":"record","payload":{"canonical_record":{"source":{"id":"1104.5687","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2011-04-29T17:45:50Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"81168f98306a4204a146e7d6463d3b593448c0449012f99733878e7b7701dc9d","abstract_canon_sha256":"115a8b65ec727d5fed03a89ac6bce0f3183bc911f68cb661a1e9f8de9da5d104"},"schema_version":"1.0"},"canonical_sha256":"7257833b22afe500d5ff036ca6d520e2aef86a6440157db6d8ef1d12851b6418","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T04:19:11.890773Z","signature_b64":"jbcLfClHgDloCoEVQO6XRDIzS9WSDsHye58vde/eTsM6pzQNIqlWNtTfdf9kScSw0AN0cSdfYyhCYwVEdjeWDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7257833b22afe500d5ff036ca6d520e2aef86a6440157db6d8ef1d12851b6418","last_reissued_at":"2026-05-18T04:19:11.890302Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T04:19:11.890302Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1104.5687","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:19:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a+abQK0RAoLvU6B0xi7GbFk2TlZdzT+OMEqeEBE62ZYNgBgm6qZhMOQN8n+wArcWp/eGdM0cce1f67CvkvL5CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T11:32:51.991014Z"},"content_sha256":"fcf301b8ff9ba9ee07f5451951b60c49dbfbfe4b56ba2b3915e85f141728fc37","schema_version":"1.0","event_id":"sha256:fcf301b8ff9ba9ee07f5451951b60c49dbfbfe4b56ba2b3915e85f141728fc37"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2011:OJLYGOZCV7SQBVP7ANWKNVJA4K","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Preference elicitation and inverse reinforcement learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Christos Dimitrakakis, Constantin Rothkopf","submitted_at":"2011-04-29T17:45:50Z","abstract_excerpt":"We state the problem of inverse reinforcement learning in terms of preference elicitation, resulting in a principled (Bayesian) statistical formulation. This generalises previous work on Bayesian inverse reinforcement learning and allows us to obtain a posterior distribution on the agent's preferences, policy and optionally, the obtained reward sequence, from observations. We examine the relation of the resulting approach to other statistical methods for inverse reinforcement learning via analysis and experimental results. We show that preferences can be determined accurately, even if the obse"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1104.5687","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:19:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xNWu9qXsMJDuSwibdxnhsKxu6rADldrSAKyRwnzQ6XxuI0ptd0KtSoQ2vvbTs8rdiStXAuEQMSKRoH7M2nu6Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T11:32:51.991675Z"},"content_sha256":"9f7b921fe8f972d419bd5e1186ed42b1980d6e213b403d2dd12efc106e947a87","schema_version":"1.0","event_id":"sha256:9f7b921fe8f972d419bd5e1186ed42b1980d6e213b403d2dd12efc106e947a87"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K/bundle.json","state_url":"https://pith.science/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T11:32:51Z","links":{"resolver":"https://pith.science/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K","bundle":"https://pith.science/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K/bundle.json","state":"https://pith.science/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OJLYGOZCV7SQBVP7ANWKNVJA4K/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2011:OJLYGOZCV7SQBVP7ANWKNVJA4K","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"115a8b65ec727d5fed03a89ac6bce0f3183bc911f68cb661a1e9f8de9da5d104","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2011-04-29T17:45:50Z","title_canon_sha256":"81168f98306a4204a146e7d6463d3b593448c0449012f99733878e7b7701dc9d"},"schema_version":"1.0","source":{"id":"1104.5687","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1104.5687","created_at":"2026-05-18T04:19:11Z"},{"alias_kind":"arxiv_version","alias_value":"1104.5687v2","created_at":"2026-05-18T04:19:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1104.5687","created_at":"2026-05-18T04:19:11Z"},{"alias_kind":"pith_short_12","alias_value":"OJLYGOZCV7SQ","created_at":"2026-05-18T12:26:37Z"},{"alias_kind":"pith_short_16","alias_value":"OJLYGOZCV7SQBVP7","created_at":"2026-05-18T12:26:37Z"},{"alias_kind":"pith_short_8","alias_value":"OJLYGOZC","created_at":"2026-05-18T12:26:37Z"}],"graph_snapshots":[{"event_id":"sha256:9f7b921fe8f972d419bd5e1186ed42b1980d6e213b403d2dd12efc106e947a87","target":"graph","created_at":"2026-05-18T04:19:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We state the problem of inverse reinforcement learning in terms of preference elicitation, resulting in a principled (Bayesian) statistical formulation. This generalises previous work on Bayesian inverse reinforcement learning and allows us to obtain a posterior distribution on the agent's preferences, policy and optionally, the obtained reward sequence, from observations. We examine the relation of the resulting approach to other statistical methods for inverse reinforcement learning via analysis and experimental results. We show that preferences can be determined accurately, even if the obse","authors_text":"Christos Dimitrakakis, Constantin Rothkopf","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2011-04-29T17:45:50Z","title":"Preference elicitation and inverse reinforcement learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1104.5687","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fcf301b8ff9ba9ee07f5451951b60c49dbfbfe4b56ba2b3915e85f141728fc37","target":"record","created_at":"2026-05-18T04:19:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"115a8b65ec727d5fed03a89ac6bce0f3183bc911f68cb661a1e9f8de9da5d104","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2011-04-29T17:45:50Z","title_canon_sha256":"81168f98306a4204a146e7d6463d3b593448c0449012f99733878e7b7701dc9d"},"schema_version":"1.0","source":{"id":"1104.5687","kind":"arxiv","version":2}},"canonical_sha256":"7257833b22afe500d5ff036ca6d520e2aef86a6440157db6d8ef1d12851b6418","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7257833b22afe500d5ff036ca6d520e2aef86a6440157db6d8ef1d12851b6418","first_computed_at":"2026-05-18T04:19:11.890302Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T04:19:11.890302Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jbcLfClHgDloCoEVQO6XRDIzS9WSDsHye58vde/eTsM6pzQNIqlWNtTfdf9kScSw0AN0cSdfYyhCYwVEdjeWDw==","signature_status":"signed_v1","signed_at":"2026-05-18T04:19:11.890773Z","signed_message":"canonical_sha256_bytes"},"source_id":"1104.5687","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fcf301b8ff9ba9ee07f5451951b60c49dbfbfe4b56ba2b3915e85f141728fc37","sha256:9f7b921fe8f972d419bd5e1186ed42b1980d6e213b403d2dd12efc106e947a87"],"state_sha256":"80051190cb618887e5275419e55fe97acb1bb5c60f8040cecfc58bc46c0a090f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Pq2SsyqvQLOtSRlEfI/RoVSGyi2tYWR7WJPiAbpHv7Afq9g81yU0YNIUv+awwBllCd4ZspRaCqqy3wt/kTfUAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T11:32:51.995584Z","bundle_sha256":"d554ac8d523932b1096a72e9cc770751ccd2b3fa4a5d8f4a4660dc346d2e5344"}}