{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:VXM2BKV7YYCQDWWK67JZCWKQ6P","short_pith_number":"pith:VXM2BKV7","canonical_record":{"source":{"id":"1802.05313","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-14T20:37:38Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"639c1af08e77f462cab2588e786eedd42118a45d66a00dd6b043ca9526f26c5d","abstract_canon_sha256":"a6ab0d1f8b0472cf8762d2099a08e135ff0c38151d379622d48a0bbad95fefba"},"schema_version":"1.0"},"canonical_sha256":"add9a0aabfc60501dacaf7d3915950f3d2833985de9603179f66430483fcf33c","source":{"kind":"arxiv","id":"1802.05313","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.05313","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"arxiv_version","alias_value":"1802.05313v2","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.05313","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"pith_short_12","alias_value":"VXM2BKV7YYCQ","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_16","alias_value":"VXM2BKV7YYCQDWWK","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_8","alias_value":"VXM2BKV7","created_at":"2026-05-18T12:32:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:VXM2BKV7YYCQDWWK67JZCWKQ6P","target":"record","payload":{"canonical_record":{"source":{"id":"1802.05313","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-14T20:37:38Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"639c1af08e77f462cab2588e786eedd42118a45d66a00dd6b043ca9526f26c5d","abstract_canon_sha256":"a6ab0d1f8b0472cf8762d2099a08e135ff0c38151d379622d48a0bbad95fefba"},"schema_version":"1.0"},"canonical_sha256":"add9a0aabfc60501dacaf7d3915950f3d2833985de9603179f66430483fcf33c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:43.424668Z","signature_b64":"hVtw9c554+TCs5pqjdb0WJDw2fkRq2Kh83dxbq9IH4HvhVbvAWhkZg1NO5X9pLsiKO9VZBM/UTHbg9HR1C17BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"add9a0aabfc60501dacaf7d3915950f3d2833985de9603179f66430483fcf33c","last_reissued_at":"2026-05-17T23:44:43.424202Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:43.424202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1802.05313","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"K+JWXsaMrVUmdk9MsTGHXYt3s+xs0+Nm7hxyp1rhTsURTSmAmiRfSv+lJFr86xcW4Ph9djLsPhELtzvFVt4XDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:14:16.695958Z"},"content_sha256":"d48c1c032a5c9d2a8ae9d726b643b632af58a0db66109cff7a6ffff950671c65","schema_version":"1.0","event_id":"sha256:d48c1c032a5c9d2a8ae9d726b643b632af58a0db66109cff7a6ffff950671c65"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:VXM2BKV7YYCQDWWK67JZCWKQ6P","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning from Imperfect Demonstrations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.AI","authors_text":"Fisher Yu, Huazhe Xu, Ji Lin, Sergey Levine, Trevor Darrell, Yang Gao","submitted_at":"2018-02-14T20:37:38Z","abstract_excerpt":"Robust real-world learning should benefit from both demonstrations and interactions with the environment. Current approaches to learning from demonstration and reward perform supervised learning on expert demonstration data and use reinforcement learning to further improve performance based on the reward received from the environment. These tasks have divergent losses which are difficult to jointly optimize and such methods can be very sensitive to noisy demonstrations. We propose a unified reinforcement learning algorithm, Normalized Actor-Critic (NAC), that effectively normalizes the Q-funct"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.05313","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ffDfRQ8buaHbySBp7iLDkGUqn6WiHbN4NNv2xPHPbFF/y19KHQnEXqKw0PXqxqtufGdviN9L+xhi3NZpY33eCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:14:16.696564Z"},"content_sha256":"30f111ee91fc651b480ae4440013842646215add53cb933ce1b43f5fdd8946c3","schema_version":"1.0","event_id":"sha256:30f111ee91fc651b480ae4440013842646215add53cb933ce1b43f5fdd8946c3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P/bundle.json","state_url":"https://pith.science/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T21:14:16Z","links":{"resolver":"https://pith.science/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P","bundle":"https://pith.science/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P/bundle.json","state":"https://pith.science/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VXM2BKV7YYCQDWWK67JZCWKQ6P/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:VXM2BKV7YYCQDWWK67JZCWKQ6P","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a6ab0d1f8b0472cf8762d2099a08e135ff0c38151d379622d48a0bbad95fefba","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-14T20:37:38Z","title_canon_sha256":"639c1af08e77f462cab2588e786eedd42118a45d66a00dd6b043ca9526f26c5d"},"schema_version":"1.0","source":{"id":"1802.05313","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.05313","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"arxiv_version","alias_value":"1802.05313v2","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.05313","created_at":"2026-05-17T23:44:43Z"},{"alias_kind":"pith_short_12","alias_value":"VXM2BKV7YYCQ","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_16","alias_value":"VXM2BKV7YYCQDWWK","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_8","alias_value":"VXM2BKV7","created_at":"2026-05-18T12:32:59Z"}],"graph_snapshots":[{"event_id":"sha256:30f111ee91fc651b480ae4440013842646215add53cb933ce1b43f5fdd8946c3","target":"graph","created_at":"2026-05-17T23:44:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Robust real-world learning should benefit from both demonstrations and interactions with the environment. Current approaches to learning from demonstration and reward perform supervised learning on expert demonstration data and use reinforcement learning to further improve performance based on the reward received from the environment. These tasks have divergent losses which are difficult to jointly optimize and such methods can be very sensitive to noisy demonstrations. We propose a unified reinforcement learning algorithm, Normalized Actor-Critic (NAC), that effectively normalizes the Q-funct","authors_text":"Fisher Yu, Huazhe Xu, Ji Lin, Sergey Levine, Trevor Darrell, Yang Gao","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-14T20:37:38Z","title":"Reinforcement Learning from Imperfect Demonstrations"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.05313","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d48c1c032a5c9d2a8ae9d726b643b632af58a0db66109cff7a6ffff950671c65","target":"record","created_at":"2026-05-17T23:44:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a6ab0d1f8b0472cf8762d2099a08e135ff0c38151d379622d48a0bbad95fefba","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-14T20:37:38Z","title_canon_sha256":"639c1af08e77f462cab2588e786eedd42118a45d66a00dd6b043ca9526f26c5d"},"schema_version":"1.0","source":{"id":"1802.05313","kind":"arxiv","version":2}},"canonical_sha256":"add9a0aabfc60501dacaf7d3915950f3d2833985de9603179f66430483fcf33c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"add9a0aabfc60501dacaf7d3915950f3d2833985de9603179f66430483fcf33c","first_computed_at":"2026-05-17T23:44:43.424202Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:43.424202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hVtw9c554+TCs5pqjdb0WJDw2fkRq2Kh83dxbq9IH4HvhVbvAWhkZg1NO5X9pLsiKO9VZBM/UTHbg9HR1C17BQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:43.424668Z","signed_message":"canonical_sha256_bytes"},"source_id":"1802.05313","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d48c1c032a5c9d2a8ae9d726b643b632af58a0db66109cff7a6ffff950671c65","sha256:30f111ee91fc651b480ae4440013842646215add53cb933ce1b43f5fdd8946c3"],"state_sha256":"c81571ad68b206da0a1daf2b76599aced0fca59fe1bb15084932fe34c4d644bf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EnScc6ig4nS4667nViuaEp/ZhJizc7IWnGzM+9BcWmi2bJmXiuFewbeugwe2S1yILjH63/hyWCicK0Q13eeIAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T21:14:16.699756Z","bundle_sha256":"18f58f1c552da2fac71460934fe89fb505325688db9050d6465a4d661502d935"}}