{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2013:NZIJGQ2AHZEEXNATL7QG4VMY62","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cc90c7b1fff448d0cf3cb0e766983f6e94b6c17fe6e415238820b1b896fd692e","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2013-07-14T22:06:12Z","title_canon_sha256":"a1903e7ec1858d23cc301e25ef4431e4200cade1835887e9e19e4b942d4052df"},"schema_version":"1.0","source":{"id":"1307.3785","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1307.3785","created_at":"2026-05-18T03:18:27Z"},{"alias_kind":"arxiv_version","alias_value":"1307.3785v1","created_at":"2026-05-18T03:18:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1307.3785","created_at":"2026-05-18T03:18:27Z"},{"alias_kind":"pith_short_12","alias_value":"NZIJGQ2AHZEE","created_at":"2026-05-18T12:27:54Z"},{"alias_kind":"pith_short_16","alias_value":"NZIJGQ2AHZEEXNAT","created_at":"2026-05-18T12:27:54Z"},{"alias_kind":"pith_short_8","alias_value":"NZIJGQ2A","created_at":"2026-05-18T12:27:54Z"}],"graph_snapshots":[{"event_id":"sha256:dc2a02c316b34b24ccb61627e294a37551cefdd44bd68cfd3cf752c256228e39","target":"graph","created_at":"2026-05-18T03:18:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the problem of learning by demonstration from agents acting in unknown stochastic Markov environments or games. Our aim is to estimate agent preferences in order to construct improved policies for the same task that the agents are trying to solve. To do so, we extend previous probabilistic approaches for inverse reinforcement learning in known MDPs to the case of unknown dynamics or opponents. We do this by deriving two simplified probabilistic models of the demonstrator's policy and utility. For tractability, we use maximum a posteriori estimation rather than full Bayesian inferen","authors_text":"Aristide C. Y. Tossou, Christos Dimitrakakis","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2013-07-14T22:06:12Z","title":"Probabilistic inverse reinforcement learning in unknown environments"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1307.3785","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0b8ca93d2bf04c7e44024b453c3b777bbbc560a57cf31b6eb22c8d13c7d4c3e6","target":"record","created_at":"2026-05-18T03:18:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cc90c7b1fff448d0cf3cb0e766983f6e94b6c17fe6e415238820b1b896fd692e","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2013-07-14T22:06:12Z","title_canon_sha256":"a1903e7ec1858d23cc301e25ef4431e4200cade1835887e9e19e4b942d4052df"},"schema_version":"1.0","source":{"id":"1307.3785","kind":"arxiv","version":1}},"canonical_sha256":"6e509343403e484bb4135fe06e5598f6beef6d7d4d41d13685e996f83c66b283","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6e509343403e484bb4135fe06e5598f6beef6d7d4d41d13685e996f83c66b283","first_computed_at":"2026-05-18T03:18:27.891436Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:18:27.891436Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"itVsVwV7az6sxZkAK+vJnw7m6TZhOJTD3jzyxE/xVzb4BLC+SWuE15nXVFrk2wzM05AVsYudhpx7jsZDGlOaBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:18:27.891874Z","signed_message":"canonical_sha256_bytes"},"source_id":"1307.3785","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0b8ca93d2bf04c7e44024b453c3b777bbbc560a57cf31b6eb22c8d13c7d4c3e6","sha256:dc2a02c316b34b24ccb61627e294a37551cefdd44bd68cfd3cf752c256228e39"],"state_sha256":"9b8b88dd4f21f9b9dc59487e924f07e66d2b52e454502285e9e6bd5d3eac2086"}