{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:42ELHNGZUMMGK5CUU6FE2EU6LH","short_pith_number":"pith:42ELHNGZ","canonical_record":{"source":{"id":"1809.02925","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-09T05:37:25Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"af8dc7fc6113484b56a0a5bb582018f086b0ddb13b43bc56060a8df7b0dbece7","abstract_canon_sha256":"dce6ee98ccf169cef064a1760a18d643dcba2abd994c9db1b80bfc7ba82621e6"},"schema_version":"1.0"},"canonical_sha256":"e688b3b4d9a318657454a78a4d129e59c4ce279df0ede04b6fa50b911d864970","source":{"kind":"arxiv","id":"1809.02925","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.02925","created_at":"2026-05-18T00:03:24Z"},{"alias_kind":"arxiv_version","alias_value":"1809.02925v2","created_at":"2026-05-18T00:03:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.02925","created_at":"2026-05-18T00:03:24Z"},{"alias_kind":"pith_short_12","alias_value":"42ELHNGZUMMG","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"42ELHNGZUMMGK5CU","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"42ELHNGZ","created_at":"2026-05-18T12:32:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:42ELHNGZUMMGK5CUU6FE2EU6LH","target":"record","payload":{"canonical_record":{"source":{"id":"1809.02925","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-09T05:37:25Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"af8dc7fc6113484b56a0a5bb582018f086b0ddb13b43bc56060a8df7b0dbece7","abstract_canon_sha256":"dce6ee98ccf169cef064a1760a18d643dcba2abd994c9db1b80bfc7ba82621e6"},"schema_version":"1.0"},"canonical_sha256":"e688b3b4d9a318657454a78a4d129e59c4ce279df0ede04b6fa50b911d864970","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:03:24.133333Z","signature_b64":"Tr/RO55qSy285qx8GXhWFN7RxHX3YBZzi4uGApTbKa+J/ffEUdMiWDbt+cglmIDMIc1sVzp5ZgsKJ3IqDmVuAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e688b3b4d9a318657454a78a4d129e59c4ce279df0ede04b6fa50b911d864970","last_reissued_at":"2026-05-18T00:03:24.132675Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:03:24.132675Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.02925","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:03:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Jx/8AeGMUL7UVc2TM8ZyhygXgMkzSg+phb4v79qNUlf6LiBtuHYHdB1aUigbhnzSyWrX472BzHfX0l1OQ7UTCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T10:20:27.324812Z"},"content_sha256":"bd1fab67950d01dd4a91f3e19b1a269072ce2e8873e4017437c6bf0c154ae2d8","schema_version":"1.0","event_id":"sha256:bd1fab67950d01dd4a91f3e19b1a269072ce2e8873e4017437c6bf0c154ae2d8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:42ELHNGZUMMGK5CUU6FE2EU6LH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Discriminator-Actor-Critic: Addressing Sample Inefficiency and Reward Bias in Adversarial Imitation Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Debidatta Dwibedi, Ilya Kostrikov, Jonathan Tompson, Kumar Krishna Agrawal, Sergey Levine","submitted_at":"2018-09-09T05:37:25Z","abstract_excerpt":"We identify two issues with the family of algorithms based on the Adversarial Imitation Learning framework. The first problem is implicit bias present in the reward functions used in these algorithms. While these biases might work well for some environments, they can also lead to sub-optimal behavior in others. Secondly, even though these algorithms can learn from few expert demonstrations, they require a prohibitively large number of interactions with the environment in order to imitate the expert for many real-world applications. In order to address these issues, we propose a new algorithm c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.02925","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:03:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0cATGRty5N+zfsm/1wJlEejv+ixQ+bntChRe05b9ljNn2X7xqqyXaZIKfGOj7KTwZe5Tt7SEh5JaWgBiqT0hCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T10:20:27.325503Z"},"content_sha256":"f6463a6ea3a8c17f7b1d1ef8e4f11dca504b1e6b64b3b3c74a90b7d56c3fb4a8","schema_version":"1.0","event_id":"sha256:f6463a6ea3a8c17f7b1d1ef8e4f11dca504b1e6b64b3b3c74a90b7d56c3fb4a8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/42ELHNGZUMMGK5CUU6FE2EU6LH/bundle.json","state_url":"https://pith.science/pith/42ELHNGZUMMGK5CUU6FE2EU6LH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/42ELHNGZUMMGK5CUU6FE2EU6LH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T10:20:27Z","links":{"resolver":"https://pith.science/pith/42ELHNGZUMMGK5CUU6FE2EU6LH","bundle":"https://pith.science/pith/42ELHNGZUMMGK5CUU6FE2EU6LH/bundle.json","state":"https://pith.science/pith/42ELHNGZUMMGK5CUU6FE2EU6LH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/42ELHNGZUMMGK5CUU6FE2EU6LH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:42ELHNGZUMMGK5CUU6FE2EU6LH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"dce6ee98ccf169cef064a1760a18d643dcba2abd994c9db1b80bfc7ba82621e6","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-09T05:37:25Z","title_canon_sha256":"af8dc7fc6113484b56a0a5bb582018f086b0ddb13b43bc56060a8df7b0dbece7"},"schema_version":"1.0","source":{"id":"1809.02925","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.02925","created_at":"2026-05-18T00:03:24Z"},{"alias_kind":"arxiv_version","alias_value":"1809.02925v2","created_at":"2026-05-18T00:03:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.02925","created_at":"2026-05-18T00:03:24Z"},{"alias_kind":"pith_short_12","alias_value":"42ELHNGZUMMG","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"42ELHNGZUMMGK5CU","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"42ELHNGZ","created_at":"2026-05-18T12:32:05Z"}],"graph_snapshots":[{"event_id":"sha256:f6463a6ea3a8c17f7b1d1ef8e4f11dca504b1e6b64b3b3c74a90b7d56c3fb4a8","target":"graph","created_at":"2026-05-18T00:03:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We identify two issues with the family of algorithms based on the Adversarial Imitation Learning framework. The first problem is implicit bias present in the reward functions used in these algorithms. While these biases might work well for some environments, they can also lead to sub-optimal behavior in others. Secondly, even though these algorithms can learn from few expert demonstrations, they require a prohibitively large number of interactions with the environment in order to imitate the expert for many real-world applications. In order to address these issues, we propose a new algorithm c","authors_text":"Debidatta Dwibedi, Ilya Kostrikov, Jonathan Tompson, Kumar Krishna Agrawal, Sergey Levine","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-09T05:37:25Z","title":"Discriminator-Actor-Critic: Addressing Sample Inefficiency and Reward Bias in Adversarial Imitation Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.02925","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bd1fab67950d01dd4a91f3e19b1a269072ce2e8873e4017437c6bf0c154ae2d8","target":"record","created_at":"2026-05-18T00:03:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"dce6ee98ccf169cef064a1760a18d643dcba2abd994c9db1b80bfc7ba82621e6","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-09T05:37:25Z","title_canon_sha256":"af8dc7fc6113484b56a0a5bb582018f086b0ddb13b43bc56060a8df7b0dbece7"},"schema_version":"1.0","source":{"id":"1809.02925","kind":"arxiv","version":2}},"canonical_sha256":"e688b3b4d9a318657454a78a4d129e59c4ce279df0ede04b6fa50b911d864970","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e688b3b4d9a318657454a78a4d129e59c4ce279df0ede04b6fa50b911d864970","first_computed_at":"2026-05-18T00:03:24.132675Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:03:24.132675Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Tr/RO55qSy285qx8GXhWFN7RxHX3YBZzi4uGApTbKa+J/ffEUdMiWDbt+cglmIDMIc1sVzp5ZgsKJ3IqDmVuAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:03:24.133333Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.02925","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bd1fab67950d01dd4a91f3e19b1a269072ce2e8873e4017437c6bf0c154ae2d8","sha256:f6463a6ea3a8c17f7b1d1ef8e4f11dca504b1e6b64b3b3c74a90b7d56c3fb4a8"],"state_sha256":"0994918b408b33e0e6cbae5a7466fdf9526420b76f66a11d437b53d59a3c0495"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2WBwTmOnKLbTFJ/wzd3fA8bfwG20b/YDAqp1ptgM31UPMflN7yOKmOnhS33GZbmHCCj96Q+CpKfz1EcAC3MWDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T10:20:27.328758Z","bundle_sha256":"85f8ccd5f76b2f76494fc9dcd14d1ae8592b54049caf4b3be0ea4380334ac0cd"}}