{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:MR2JYF4QG2JN5NDCCG7XLZUETH","short_pith_number":"pith:MR2JYF4Q","canonical_record":{"source":{"id":"1901.05101","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-16T01:20:00Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"1efaa723c8a368491347bc6f7e3bebcebf367b763fafea9a60dca926ca26ce8e","abstract_canon_sha256":"c6c751f0c302f5e1d782b642dd786416533a7a9d78f8cab99be10654fa470fab"},"schema_version":"1.0"},"canonical_sha256":"64749c17903692deb46211bf75e68499eaa288d7bee3c71b790f62972d75e890","source":{"kind":"arxiv","id":"1901.05101","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.05101","created_at":"2026-05-17T23:56:11Z"},{"alias_kind":"arxiv_version","alias_value":"1901.05101v1","created_at":"2026-05-17T23:56:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.05101","created_at":"2026-05-17T23:56:11Z"},{"alias_kind":"pith_short_12","alias_value":"MR2JYF4QG2JN","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"MR2JYF4QG2JN5NDC","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"MR2JYF4Q","created_at":"2026-05-18T12:33:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:MR2JYF4QG2JN5NDCCG7XLZUETH","target":"record","payload":{"canonical_record":{"source":{"id":"1901.05101","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-16T01:20:00Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"1efaa723c8a368491347bc6f7e3bebcebf367b763fafea9a60dca926ca26ce8e","abstract_canon_sha256":"c6c751f0c302f5e1d782b642dd786416533a7a9d78f8cab99be10654fa470fab"},"schema_version":"1.0"},"canonical_sha256":"64749c17903692deb46211bf75e68499eaa288d7bee3c71b790f62972d75e890","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:56:11.950837Z","signature_b64":"WbWegDNx+zLCD0QCw6VLueMiTf9jjcAyYvzKVnjmWxrgvru3kWbAVX2ZEIiNcwpqHprJHRBfeZ+4Z71IfCg6AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"64749c17903692deb46211bf75e68499eaa288d7bee3c71b790f62972d75e890","last_reissued_at":"2026-05-17T23:56:11.950228Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:56:11.950228Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.05101","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:56:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0/h3rqNVWeKe5+kZZAHFRL9DPjP+RwYalqWrGLAEDtH459/s3DUfU91YqItW4PikyUwJ5g+WOwvYTSd2SmsjDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T22:35:30.304290Z"},"content_sha256":"6d7b2aac2fe59c6d24a0cc8ce55151bb5357b2936011da30e238973653f7d8ff","schema_version":"1.0","event_id":"sha256:6d7b2aac2fe59c6d24a0cc8ce55151bb5357b2936011da30e238973653f7d8ff"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:MR2JYF4QG2JN5NDCCG7XLZUETH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ReNeg and Backseat Driver: Learning from Demonstration with Continuous Human Feedback","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jacob Beck, Michael Littman, Zoe Papakipos","submitted_at":"2019-01-16T01:20:00Z","abstract_excerpt":"In autonomous vehicle (AV) control, allowing mistakes can be quite dangerous and costly in the real world. For this reason we investigate methods of training an AV without allowing the agent to explore and instead having a human explorer collect the data. Supervised learning has been explored for AV control, but it encounters the issue of the covariate shift. That is, training data collected from an optimal demonstration consists only of the states induced by the optimal control policy, but at runtime, the trained agent may encounter a vastly different state distribution with little relevant t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.05101","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:56:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vwWdsHuOr4qmQATCjUyrez6SJiPVo2RtXGvFWvJaTG+SebUAzHotAaSSQt5rHb9uDkdBnzFI5DjGxs9U1aPnDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T22:35:30.305048Z"},"content_sha256":"118a26aeaac78dcae370280274561fd6d6ae5fd34b37fd2922527614cfed713b","schema_version":"1.0","event_id":"sha256:118a26aeaac78dcae370280274561fd6d6ae5fd34b37fd2922527614cfed713b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MR2JYF4QG2JN5NDCCG7XLZUETH/bundle.json","state_url":"https://pith.science/pith/MR2JYF4QG2JN5NDCCG7XLZUETH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MR2JYF4QG2JN5NDCCG7XLZUETH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T22:35:30Z","links":{"resolver":"https://pith.science/pith/MR2JYF4QG2JN5NDCCG7XLZUETH","bundle":"https://pith.science/pith/MR2JYF4QG2JN5NDCCG7XLZUETH/bundle.json","state":"https://pith.science/pith/MR2JYF4QG2JN5NDCCG7XLZUETH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MR2JYF4QG2JN5NDCCG7XLZUETH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:MR2JYF4QG2JN5NDCCG7XLZUETH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c6c751f0c302f5e1d782b642dd786416533a7a9d78f8cab99be10654fa470fab","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-16T01:20:00Z","title_canon_sha256":"1efaa723c8a368491347bc6f7e3bebcebf367b763fafea9a60dca926ca26ce8e"},"schema_version":"1.0","source":{"id":"1901.05101","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.05101","created_at":"2026-05-17T23:56:11Z"},{"alias_kind":"arxiv_version","alias_value":"1901.05101v1","created_at":"2026-05-17T23:56:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.05101","created_at":"2026-05-17T23:56:11Z"},{"alias_kind":"pith_short_12","alias_value":"MR2JYF4QG2JN","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"MR2JYF4QG2JN5NDC","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"MR2JYF4Q","created_at":"2026-05-18T12:33:21Z"}],"graph_snapshots":[{"event_id":"sha256:118a26aeaac78dcae370280274561fd6d6ae5fd34b37fd2922527614cfed713b","target":"graph","created_at":"2026-05-17T23:56:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In autonomous vehicle (AV) control, allowing mistakes can be quite dangerous and costly in the real world. For this reason we investigate methods of training an AV without allowing the agent to explore and instead having a human explorer collect the data. Supervised learning has been explored for AV control, but it encounters the issue of the covariate shift. That is, training data collected from an optimal demonstration consists only of the states induced by the optimal control policy, but at runtime, the trained agent may encounter a vastly different state distribution with little relevant t","authors_text":"Jacob Beck, Michael Littman, Zoe Papakipos","cross_cats":["cs.AI","cs.RO","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-16T01:20:00Z","title":"ReNeg and Backseat Driver: Learning from Demonstration with Continuous Human Feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.05101","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6d7b2aac2fe59c6d24a0cc8ce55151bb5357b2936011da30e238973653f7d8ff","target":"record","created_at":"2026-05-17T23:56:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c6c751f0c302f5e1d782b642dd786416533a7a9d78f8cab99be10654fa470fab","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-16T01:20:00Z","title_canon_sha256":"1efaa723c8a368491347bc6f7e3bebcebf367b763fafea9a60dca926ca26ce8e"},"schema_version":"1.0","source":{"id":"1901.05101","kind":"arxiv","version":1}},"canonical_sha256":"64749c17903692deb46211bf75e68499eaa288d7bee3c71b790f62972d75e890","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"64749c17903692deb46211bf75e68499eaa288d7bee3c71b790f62972d75e890","first_computed_at":"2026-05-17T23:56:11.950228Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:56:11.950228Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WbWegDNx+zLCD0QCw6VLueMiTf9jjcAyYvzKVnjmWxrgvru3kWbAVX2ZEIiNcwpqHprJHRBfeZ+4Z71IfCg6AQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:56:11.950837Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.05101","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6d7b2aac2fe59c6d24a0cc8ce55151bb5357b2936011da30e238973653f7d8ff","sha256:118a26aeaac78dcae370280274561fd6d6ae5fd34b37fd2922527614cfed713b"],"state_sha256":"066a95e2725402c2615c5edf5138312d65737e2a3cb5ddda9309a316caff5b7a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"28Z43CD8vR/XapHS4VMcoOys12ZySDgY44MetcA2oEBIxUnzlhthOwUvOKfLzqZsHKKCBPmHuGhfXNWTqO2vCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T22:35:30.309206Z","bundle_sha256":"b587594bc10e7b6e035e6a79a76e166bc5e32037b1b91eb3eb35968f02f56404"}}