{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:OCERZC75I5Q3MFUEUXSRJDCYHG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8e7799b6d033619fb904ad3218732c8be6b3a7d921706ab1351feba81f727d64","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-15T03:13:23Z","title_canon_sha256":"5bb8d45c58c0b3ef167ea03fb34d2ec35718ede16c936087e122cf2d6e20efaa"},"schema_version":"1.0","source":{"id":"1712.05514","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.05514","created_at":"2026-05-18T00:27:56Z"},{"alias_kind":"arxiv_version","alias_value":"1712.05514v1","created_at":"2026-05-18T00:27:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.05514","created_at":"2026-05-18T00:27:56Z"},{"alias_kind":"pith_short_12","alias_value":"OCERZC75I5Q3","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OCERZC75I5Q3MFUE","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OCERZC75","created_at":"2026-05-18T12:31:34Z"}],"graph_snapshots":[{"event_id":"sha256:92e6fd867c50ac413f88b47e0575344f291c394d991c51ce263bf9aabbdd78cf","target":"graph","created_at":"2026-05-18T00:27:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Inverse Reinforcement Learning (IRL) is the task of learning a single reward function given a Markov Decision Process (MDP) without defining the reward function, and a set of demonstrations generated by humans/experts. However, in practice, it may be unreasonable to assume that human behaviors can be explained by one reward function since they may be inherently inconsistent. Also, demonstrations may be collected from various users and aggregated to infer and predict user's behaviors. In this paper, we introduce the Non-parametric Behavior Clustering IRL algorithm to simultaneously cluster demo","authors_text":"Jie Fu, Jinwei Zhang, Siddharthan Rajasekaran","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-15T03:13:23Z","title":"Inverse Reinforce Learning with Nonparametric Behavior Clustering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.05514","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3d831f98fdce54906b44a9fe72fd60847d4f5c896781866b63a8d4fc7a87c3f7","target":"record","created_at":"2026-05-18T00:27:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8e7799b6d033619fb904ad3218732c8be6b3a7d921706ab1351feba81f727d64","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-12-15T03:13:23Z","title_canon_sha256":"5bb8d45c58c0b3ef167ea03fb34d2ec35718ede16c936087e122cf2d6e20efaa"},"schema_version":"1.0","source":{"id":"1712.05514","kind":"arxiv","version":1}},"canonical_sha256":"70891c8bfd4761b61684a5e5148c583984ad8df18ae96cff4cfb9ee68f9f910c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"70891c8bfd4761b61684a5e5148c583984ad8df18ae96cff4cfb9ee68f9f910c","first_computed_at":"2026-05-18T00:27:56.258193Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:27:56.258193Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kfBB00EdygpyGOIJTo3h4beeYZJWPszbJyRDTsUnQUI7NSDnkJBe7xlo1K9b2tEJtvGx9vxzH2fwHnCk3ufBCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:27:56.258700Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.05514","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3d831f98fdce54906b44a9fe72fd60847d4f5c896781866b63a8d4fc7a87c3f7","sha256:92e6fd867c50ac413f88b47e0575344f291c394d991c51ce263bf9aabbdd78cf"],"state_sha256":"fdb7142e8a09d709418d2066088ecfc544e0988c74b544c0dbbd60758f48c360"}