{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:3DCYUJA2WKTDQRU3NCLS4QQBZA","short_pith_number":"pith:3DCYUJA2","schema_version":"1.0","canonical_sha256":"d8c58a241ab2a638469b68972e4201c837f310a73979865788fad3918ca34ff2","source":{"kind":"arxiv","id":"1611.03852","version":3},"attestation_state":"computed","paper":{"title":"A Connection between Generative Adversarial Networks, Inverse Reinforcement Learning, and Energy-Based Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chelsea Finn, Paul Christiano, Pieter Abbeel, Sergey Levine","submitted_at":"2016-11-11T20:53:45Z","abstract_excerpt":"Generative adversarial networks (GANs) are a recently proposed class of generative models in which a generator is trained to optimize a cost function that is being simultaneously learned by a discriminator. While the idea of learning cost functions is relatively new to the field of generative modeling, learning costs has long been studied in control and reinforcement learning (RL) domains, typically for imitation learning from demonstrations. In these fields, learning cost function underlying observed behavior is known as inverse reinforcement learning (IRL) or inverse optimal control. While a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1611.03852","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-11T20:53:45Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3cca2b439fe0c7ddd95f395b667fbc3c7703a0cf9f9930bbaca99cebec0664b6","abstract_canon_sha256":"ac8cb84c19f90a3d2ff6dae607f26fce9d227446646859e55f110c7f5bed7893"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:56:45.101291Z","signature_b64":"LTd6X8pk+XLCfpzehDc5Mlh9kcjalRN0AHH9gUWMBtrpzzn8uzhbUtN8K7X1u8fc+1OBLJDV0Gewi+UHxMlKCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d8c58a241ab2a638469b68972e4201c837f310a73979865788fad3918ca34ff2","last_reissued_at":"2026-05-18T00:56:45.100625Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:56:45.100625Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Connection between Generative Adversarial Networks, Inverse Reinforcement Learning, and Energy-Based Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chelsea Finn, Paul Christiano, Pieter Abbeel, Sergey Levine","submitted_at":"2016-11-11T20:53:45Z","abstract_excerpt":"Generative adversarial networks (GANs) are a recently proposed class of generative models in which a generator is trained to optimize a cost function that is being simultaneously learned by a discriminator. While the idea of learning cost functions is relatively new to the field of generative modeling, learning costs has long been studied in control and reinforcement learning (RL) domains, typically for imitation learning from demonstrations. In these fields, learning cost function underlying observed behavior is known as inverse reinforcement learning (IRL) or inverse optimal control. While a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.03852","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1611.03852","created_at":"2026-05-18T00:56:45.100725+00:00"},{"alias_kind":"arxiv_version","alias_value":"1611.03852v3","created_at":"2026-05-18T00:56:45.100725+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.03852","created_at":"2026-05-18T00:56:45.100725+00:00"},{"alias_kind":"pith_short_12","alias_value":"3DCYUJA2WKTD","created_at":"2026-05-18T12:29:55.572404+00:00"},{"alias_kind":"pith_short_16","alias_value":"3DCYUJA2WKTDQRU3","created_at":"2026-05-18T12:29:55.572404+00:00"},{"alias_kind":"pith_short_8","alias_value":"3DCYUJA2","created_at":"2026-05-18T12:29:55.572404+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.30843","citing_title":"A Lecture Note on Offline RL and IRL, Part II: Foundations of Inverse Reinforcement Learning and Dynamic Discrete Choice Models","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"1907.06584","citing_title":"Environment Reconstruction with Hidden Confounders for Reinforcement Learning based Recommendation","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"1805.00909","citing_title":"Reinforcement Learning and Control as Probabilistic Inference: Tutorial and Review","ref_index":8,"is_internal_anchor":false},{"citing_arxiv_id":"2605.11020","citing_title":"Trust Region Inverse Reinforcement Learning: Explicit Dual Ascent using Local Policy Updates","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03660","citing_title":"Stage Light is Sequence$^2$: Multi-Light Control via Imitation Learning","ref_index":13,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA","json":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA.json","graph_json":"https://pith.science/api/pith-number/3DCYUJA2WKTDQRU3NCLS4QQBZA/graph.json","events_json":"https://pith.science/api/pith-number/3DCYUJA2WKTDQRU3NCLS4QQBZA/events.json","paper":"https://pith.science/paper/3DCYUJA2"},"agent_actions":{"view_html":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA","download_json":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA.json","view_paper":"https://pith.science/paper/3DCYUJA2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1611.03852&json=true","fetch_graph":"https://pith.science/api/pith-number/3DCYUJA2WKTDQRU3NCLS4QQBZA/graph.json","fetch_events":"https://pith.science/api/pith-number/3DCYUJA2WKTDQRU3NCLS4QQBZA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA/action/storage_attestation","attest_author":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA/action/author_attestation","sign_citation":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA/action/citation_signature","submit_replication":"https://pith.science/pith/3DCYUJA2WKTDQRU3NCLS4QQBZA/action/replication_record"}},"created_at":"2026-05-18T00:56:45.100725+00:00","updated_at":"2026-05-18T00:56:45.100725+00:00"}