{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2020:K52VUFP5FW6NMBKE4SHDVTBHFE","short_pith_number":"pith:K52VUFP5","schema_version":"1.0","canonical_sha256":"57755a15fd2dbcd60544e48e3acc27293c4dc29bed6c9f85f2c2188c0093fb1e","source":{"kind":"arxiv","id":"2004.09395","version":4},"attestation_state":"computed","paper":{"title":"Energy-Based Imitation Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Minghuan Liu, Minkai Xu, Tairan He, Weinan Zhang","submitted_at":"2020-04-20T15:49:35Z","abstract_excerpt":"We tackle a common scenario in imitation learning (IL), where agents try to recover the optimal policy from expert demonstrations without further access to the expert or environment reward signals. Except the simple Behavior Cloning (BC) that adopts supervised learning followed by the problem of compounding error, previous solutions like inverse reinforcement learning (IRL) and recent generative adversarial methods involve a bi-level or alternating optimization for updating the reward function and the policy, suffering from high computational cost and training instability. Inspired by recent p"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2004.09395","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-20T15:49:35Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"9dd433d91d91fdf6d4ce9168440101e020ab5e389baf6bf57de53edfbd311f52","abstract_canon_sha256":"41c5e19e539d99fa8e175e63b055268af0b6bf8ea9a220f01da6b839f2990b42"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T02:32:12.818304Z","signature_b64":"ORf8FTSM4fl/vGz/ggG5XRdCOvgYBZMlWQnX7037LHAS8KDp4hJ87uimg1gKNdk1IwLDO2LJ6ojv4kFOGpMlAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"57755a15fd2dbcd60544e48e3acc27293c4dc29bed6c9f85f2c2188c0093fb1e","last_reissued_at":"2026-07-05T02:32:12.817802Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T02:32:12.817802Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Energy-Based Imitation Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Minghuan Liu, Minkai Xu, Tairan He, Weinan Zhang","submitted_at":"2020-04-20T15:49:35Z","abstract_excerpt":"We tackle a common scenario in imitation learning (IL), where agents try to recover the optimal policy from expert demonstrations without further access to the expert or environment reward signals. Except the simple Behavior Cloning (BC) that adopts supervised learning followed by the problem of compounding error, previous solutions like inverse reinforcement learning (IRL) and recent generative adversarial methods involve a bi-level or alternating optimization for updating the reward function and the policy, suffering from high computational cost and training instability. Inspired by recent p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2004.09395","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2004.09395/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2004.09395","created_at":"2026-07-05T02:32:12.817868+00:00"},{"alias_kind":"arxiv_version","alias_value":"2004.09395v4","created_at":"2026-07-05T02:32:12.817868+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2004.09395","created_at":"2026-07-05T02:32:12.817868+00:00"},{"alias_kind":"pith_short_12","alias_value":"K52VUFP5FW6N","created_at":"2026-07-05T02:32:12.817868+00:00"},{"alias_kind":"pith_short_16","alias_value":"K52VUFP5FW6NMBKE","created_at":"2026-07-05T02:32:12.817868+00:00"},{"alias_kind":"pith_short_8","alias_value":"K52VUFP5","created_at":"2026-07-05T02:32:12.817868+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2311.01378","citing_title":"Vision-Language Foundation Models as Effective Robot Imitators","ref_index":13,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE","json":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE.json","graph_json":"https://pith.science/api/pith-number/K52VUFP5FW6NMBKE4SHDVTBHFE/graph.json","events_json":"https://pith.science/api/pith-number/K52VUFP5FW6NMBKE4SHDVTBHFE/events.json","paper":"https://pith.science/paper/K52VUFP5"},"agent_actions":{"view_html":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE","download_json":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE.json","view_paper":"https://pith.science/paper/K52VUFP5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2004.09395&json=true","fetch_graph":"https://pith.science/api/pith-number/K52VUFP5FW6NMBKE4SHDVTBHFE/graph.json","fetch_events":"https://pith.science/api/pith-number/K52VUFP5FW6NMBKE4SHDVTBHFE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE/action/storage_attestation","attest_author":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE/action/author_attestation","sign_citation":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE/action/citation_signature","submit_replication":"https://pith.science/pith/K52VUFP5FW6NMBKE4SHDVTBHFE/action/replication_record"}},"created_at":"2026-07-05T02:32:12.817868+00:00","updated_at":"2026-07-05T02:32:12.817868+00:00"}