{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:ZKDTC2LXWR2UYYV7SLZ3IA3D7T","short_pith_number":"pith:ZKDTC2LX","schema_version":"1.0","canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","source":{"kind":"arxiv","id":"1710.11248","version":2},"attestation_state":"computed","paper":{"title":"Learning Robust Rewards with Adversarial Inverse Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Justin Fu, Katie Luo, Sergey Levine","submitted_at":"2017-10-30T21:22:28Z","abstract_excerpt":"Reinforcement learning provides a powerful and general framework for decision making and control, but its application in practice is often hindered by the need for extensive feature and reward engineering. Deep reinforcement learning methods can remove the need for explicit engineering of policy or value features, but still require a manually specified reward function. Inverse reinforcement learning holds the promise of automatic reward acquisition, but has proven exceptionally difficult to apply to large, high-dimensional problems with unknown dynamics. In this work, we propose adverserial in"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1710.11248","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-10-30T21:22:28Z","cross_cats_sorted":[],"title_canon_sha256":"e783a6b2cb825e97bc398e0bf8eeb2fe819a59c0128b311251b5e8887bae943c","abstract_canon_sha256":"9b02b91f40c3fe9d0f2fd74104bab9120e71a7770d1879f85dc7817e49fa7881"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:08:15.836665Z","signature_b64":"mFbj0/oJM/r/fAlJ7gIrvSVoAcX5xRnKOF7btpnSmew65+fXraZBwqlTsGgsNyHVrUtnPO0uQcOm67Q1igo1AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ca87316977b4754c62bf92f3b40363fcde02816b747e357c745b28f6ffb5e727","last_reissued_at":"2026-05-18T00:08:15.836202Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:08:15.836202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Robust Rewards with Adversarial Inverse Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Justin Fu, Katie Luo, Sergey Levine","submitted_at":"2017-10-30T21:22:28Z","abstract_excerpt":"Reinforcement learning provides a powerful and general framework for decision making and control, but its application in practice is often hindered by the need for extensive feature and reward engineering. Deep reinforcement learning methods can remove the need for explicit engineering of policy or value features, but still require a manually specified reward function. Inverse reinforcement learning holds the promise of automatic reward acquisition, but has proven exceptionally difficult to apply to large, high-dimensional problems with unknown dynamics. In this work, we propose adverserial in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11248","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1710.11248","created_at":"2026-05-18T00:08:15.836275+00:00"},{"alias_kind":"arxiv_version","alias_value":"1710.11248v2","created_at":"2026-05-18T00:08:15.836275+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11248","created_at":"2026-05-18T00:08:15.836275+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZKDTC2LXWR2U","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZKDTC2LXWR2UYYV7","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZKDTC2LX","created_at":"2026-05-18T12:31:59.375834+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.09183","citing_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"2307.05973","citing_title":"VoxPoser: Composable 3D Value Maps for Robotic Manipulation with Language Models","ref_index":100,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08404","citing_title":"Built Environment Reasoning from Remote Sensing Imagery Using Large Vision--Language Models","ref_index":63,"is_internal_anchor":false},{"citing_arxiv_id":"2605.09183","citing_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","ref_index":57,"is_internal_anchor":false},{"citing_arxiv_id":"2605.00623","citing_title":"Recovering Hidden Reward in Diffusion-Based Policies","ref_index":5,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T","json":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T.json","graph_json":"https://pith.science/api/pith-number/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/graph.json","events_json":"https://pith.science/api/pith-number/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/events.json","paper":"https://pith.science/paper/ZKDTC2LX"},"agent_actions":{"view_html":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T","download_json":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T.json","view_paper":"https://pith.science/paper/ZKDTC2LX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1710.11248&json=true","fetch_graph":"https://pith.science/api/pith-number/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/graph.json","fetch_events":"https://pith.science/api/pith-number/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/action/storage_attestation","attest_author":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/action/author_attestation","sign_citation":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/action/citation_signature","submit_replication":"https://pith.science/pith/ZKDTC2LXWR2UYYV7SLZ3IA3D7T/action/replication_record"}},"created_at":"2026-05-18T00:08:15.836275+00:00","updated_at":"2026-05-18T00:08:15.836275+00:00"}