{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:ZKDTC2LXWR2UYYV7SLZ3IA3D7T","short_pith_number":"pith:ZKDTC2LX","canonical_record":{"source":{"id":"1710.11248","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","cross_cats_sorted":[],"title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c","abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881"},"schema_version":"1.0"},"canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","source":{"kind":"arxiv","id":"1710.11248","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1710.11248","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"arxiv_version","alias_value":"1710.11248v2","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11248","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"pith_short_12","alias_value":"ZKDTC2LXWR2U","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZKDTC2LXWR2UYYV7","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZKDTC2LX","created_at":"2026-05-18T12:31:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:ZKDTC2LXWR2UYYV7SLZ3IA3D7T","target":"record","payload":{"canonical_record":{"source":{"id":"1710.11248","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","cross_cats_sorted":[],"title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c","abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881"},"schema_version":"1.0"},"canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:08:15.836665Z","signature_b64":"mFbj0/oJM/r/fAlJ7gIrvSVoAcX5xRnKOF7btpnSmew65+fXraZBwqlTsGgsNyHVrUtnPO0uQcOm67Q1igo1AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","last_reissued_at":"2026-05-18T00:08:15.836202Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:08:15.836202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1710.11248","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lzAUqAp9LIsXlEFyVUkkFxrTGtcmjyYEkuwoRVRMy3At6Om7cDC/tcBc4gb9spyAQu/V++ZpzYEEPiYyc0XZBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T21:15:55.585241Z"},"content_sha256":"eed20e2b8b6f2d2f3b7ef42ed4d92da627d14ab9b057a2b8c2fe450f7dfe2825","schema_version":"1.0","event_id":"sha256:eed20e2b8b6f2d2f3b7ef42ed4d92da627d14ab9b057a2b8c2fe450f7dfe2825"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:ZKDTC2LXWR2UYYV7SLZ3IA3D7T","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Robust Rewards with Adversarial Inverse Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Justin Fu, Katie Luo, Sergey Levine","submitted_at":"2017-10-30T21:22:28Z","abstract_excerpt":"Reinforcement learning provides a powerful and general framework for decision making and control, but its application in practice is often hindered by the need for extensive feature and reward engineering. Deep reinforcement learning methods can remove the need for explicit engineering of policy or value features, but still require a manually specified reward function. Inverse reinforcement learning holds the promise of automatic reward acquisition, but has proven exceptionally difficult to apply to large, high-dimensional problems with unknown dynamics. In this work, we propose adverserial in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11248","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"29CBzvfagoeX/Ki31zWoyIlQnU6Ns0KW8SuVlnrjVTjXeMOcB140fueGljt8G8nu+EJI6XcyfJCr5LIhfpSfCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T21:15:55.585967Z"},"content_sha256":"04bd0e911b2f2bd110881715ea0d5c86aa6b1e2821382c1191675c3633d7602b","schema_version":"1.0","event_id":"sha256:04bd0e911b2f2bd110881715ea0d5c86aa6b1e2821382c1191675c3633d7602b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/bundle.json","state_url":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T21:15:55Z","links":{"resolver":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T","bundle":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/bundle.json","state":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ZKDTC2LXWR2UYYV7SLZ3IA3D7T","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c"},"schema_version":"1.0","source":{"id":"1710.11248","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1710.11248","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"arxiv_version","alias_value":"1710.11248v2","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11248","created_at":"2026-05-18T00:08:15Z"},{"alias_kind":"pith_short_12","alias_value":"ZKDTC2LXWR2U","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZKDTC2LXWR2UYYV7","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZKDTC2LX","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:04bd0e911b2f2bd110881715ea0d5c86aa6b1e2821382c1191675c3633d7602b","target":"graph","created_at":"2026-05-18T00:08:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning provides a powerful and general framework for decision making and control, but its application in practice is often hindered by the need for extensive feature and reward engineering. Deep reinforcement learning methods can remove the need for explicit engineering of policy or value features, but still require a manually specified reward function. Inverse reinforcement learning holds the promise of automatic reward acquisition, but has proven exceptionally difficult to apply to large, high-dimensional problems with unknown dynamics. In this work, we propose adverserial in","authors_text":"Justin Fu, Katie Luo, Sergey Levine","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","title":"Learning Robust Rewards with Adversarial Inverse Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11248","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:eed20e2b8b6f2d2f3b7ef42ed4d92da627d14ab9b057a2b8c2fe450f7dfe2825","target":"record","created_at":"2026-05-18T00:08:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c"},"schema_version":"1.0","source":{"id":"1710.11248","kind":"arxiv","version":2}},"canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","first_computed_at":"2026-05-18T00:08:15.836202Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:08:15.836202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mFbj0/oJM/r/fAlJ7gIrvSVoAcX5xRnKOF7btpnSmew65+fXraZBwqlTsGgsNyHVrUtnPO0uQcOm67Q1igo1AQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:08:15.836665Z","signed_message":"canonical_sha256_bytes"},"source_id":"1710.11248","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:eed20e2b8b6f2d2f3b7ef42ed4d92da627d14ab9b057a2b8c2fe450f7dfe2825","sha256:04bd0e911b2f2bd110881715ea0d5c86aa6b1e2821382c1191675c3633d7602b"],"state_sha256":"436d0a17495d677bb810c5659aa72c963670f3b6455277b1afa1ae27b9f28539"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vMuDC1c00TRx4Q8UuJt0os0TltnUrn9rQzwYB/6NcSbeYWMOsRwGF8w7b484JbA+mZrGqP5oZgUPd61/JJB+AQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T21:15:55.589719Z","bundle_sha256":"56648f0b5d066427b831c9a0305ac965b749cebf6c898d64e308ad2d3de2ae21"}}