{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ZKDTC2LXWR2UYYV7SLZ3IA3D7T","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c"},"schema_version":"1.0","source":{"id":"1710.11248","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1710.11248","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"arxiv_version","alias_value":"1710.11248v2","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11248","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"pith_short_12","alias_value":"ZKDTC2LXWR2U","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZKDTC2LXWR2UYYV7","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZKDTC2LX","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:04bd0e911b2f2bd110881715ea0d5c86aa6b1e2821382c1191675c3633d7602b","target":"graph","created_at":"2026-05-18T00:08:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning provides a powerful and general framework for decision making and control, but its application in practice is often hindered by the need for extensive feature and reward engineering. Deep reinforcement learning methods can remove the need for explicit engineering of policy or value features, but still require a manually specified reward function. Inverse reinforcement learning holds the promise of automatic reward acquisition, but has proven exceptionally difficult to apply to large, high-dimensional problems with unknown dynamics. In this work, we propose adverserial in","authors_text":"Justin Fu, Katie Luo, Sergey Levine","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","title":"Learning Robust Rewards with Adversarial Inverse Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11248","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:eed20e2b8b6f2d2f3b7ef42ed4d92da627d14ab9b057a2b8c2fe450f7dfe2825","target":"record","created_at":"2026-05-18T00:08:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c"},"schema_version":"1.0","source":{"id":"1710.11248","kind":"arxiv","version":2}},"canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","first_computed_at":"2026-05-18T00:08:15.836202Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:08:15.836202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mFbj0/oJM/r/fAlJ7gIrvSVoAcX5xRnKOF7btpnSmew65+fXraZBwqlTsGgsNyHVrUtnPO0uQcOm67Q1igo1AQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:08:15.836665Z","signed_message":"canonical_sha256_bytes"},"source_id":"1710.11248","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:eed20e2b8b6f2d2f3b7ef42ed4d92da627d14ab9b057a2b8c2fe450f7dfe2825","sha256:04bd0e911b2f2bd110881715ea0d5c86aa6b1e2821382c1191675c3633d7602b"],"state_sha256":"436d0a17495d677bb810c5659aa72c963670f3b6455277b1afa1ae27b9f28539"}