{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:WRG4A45OFUCXE3ZF7SKNUXFELN","short_pith_number":"pith:WRG4A45O","canonical_record":{"source":{"id":"1904.07854","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-16T17:59:23Z","cross_cats_sorted":["cs.CV","cs.RO","stat.ML"],"title_canon_sha256":"e25b013f47ce7c83f9d878b5a7081b5a574c4f6e371de0cb905e4165e2a24f31","abstract_canon_sha256":"99ebd438cbc5c83872146c5d7b9f5ed7779cb6b288d2a604978e88fb50f5acbf"},"schema_version":"1.0"},"canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","source":{"kind":"arxiv","id":"1904.07854","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.07854","created_at":"2026-05-17T23:46:03Z"},{"alias_kind":"arxiv_version","alias_value":"1904.07854v2","created_at":"2026-05-17T23:46:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.07854","created_at":"2026-05-17T23:46:03Z"},{"alias_kind":"pith_short_12","alias_value":"WRG4A45OFUCX","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"WRG4A45OFUCXE3ZF","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"WRG4A45O","created_at":"2026-05-18T12:33:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:WRG4A45OFUCXE3ZF7SKNUXFELN","target":"record","payload":{"canonical_record":{"source":{"id":"1904.07854","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-16T17:59:23Z","cross_cats_sorted":["cs.CV","cs.RO","stat.ML"],"title_canon_sha256":"e25b013f47ce7c83f9d878b5a7081b5a574c4f6e371de0cb905e4165e2a24f31","abstract_canon_sha256":"99ebd438cbc5c83872146c5d7b9f5ed7779cb6b288d2a604978e88fb50f5acbf"},"schema_version":"1.0"},"canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:03.202518Z","signature_b64":"AXbnuyVAxtm9BPnRjVXiaXfkf1tMbW8le8Axly+eZkTlJ3QDsXtj3trHWEpl0dDZkTP/xkSCLbA42ljuUnO8CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","last_reissued_at":"2026-05-17T23:46:03.201799Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:03.201799Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.07854","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:46:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0as+uXxj1M3mE1xbbf0xam7g2n2PX9jp0xl4PymdTT1Ah0nQKN3KBy8KwzKUjfs2QPVMWnU9xgPGRr+t4VydDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T10:57:06.099051Z"},"content_sha256":"33e94b3201cb36132b07a18a5ce503b5226d2ad852eb76074374894143981225","schema_version":"1.0","event_id":"sha256:33e94b3201cb36132b07a18a5ce503b5226d2ad852eb76074374894143981225"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:WRG4A45OFUCXE3ZF7SKNUXFELN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"End-to-End Robotic Reinforcement Learning without Reward Engineering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Avi Singh, Chelsea Finn, Kristian Hartikainen, Larry Yang, Sergey Levine","submitted_at":"2019-04-16T17:59:23Z","abstract_excerpt":"The combination of deep neural network models and reinforcement learning algorithms can make it possible to learn policies for robotic behaviors that directly read in raw sensory inputs, such as camera images, effectively subsuming both estimation and control into one model. However, real-world applications of reinforcement learning must specify the goal of the task by means of a manually programmed reward function, which in practice requires either designing the very same perception pipeline that end-to-end reinforcement learning promises to avoid, or else instrumenting the environment with a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.07854","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:46:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"COVraNvS3YZGVtcsf8d3eyTEPFqBpzn09DIDcYqhoovSZUkgiVtJ0PAca4IaFYF2LgAP9uBEJ4uRaoFlIQNYCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T10:57:06.099435Z"},"content_sha256":"907d8cef51651a2041ffc18bbe6704be7ef889c73fca58ad9860d642b53d4a1e","schema_version":"1.0","event_id":"sha256:907d8cef51651a2041ffc18bbe6704be7ef889c73fca58ad9860d642b53d4a1e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/bundle.json","state_url":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T10:57:06Z","links":{"resolver":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN","bundle":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/bundle.json","state":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:WRG4A45OFUCXE3ZF7SKNUXFELN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"99ebd438cbc5c83872146c5d7b9f5ed7779cb6b288d2a604978e88fb50f5acbf","cross_cats_sorted":["cs.CV","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-16T17:59:23Z","title_canon_sha256":"e25b013f47ce7c83f9d878b5a7081b5a574c4f6e371de0cb905e4165e2a24f31"},"schema_version":"1.0","source":{"id":"1904.07854","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.07854","created_at":"2026-05-17T23:46:03Z"},{"alias_kind":"arxiv_version","alias_value":"1904.07854v2","created_at":"2026-05-17T23:46:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.07854","created_at":"2026-05-17T23:46:03Z"},{"alias_kind":"pith_short_12","alias_value":"WRG4A45OFUCX","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"WRG4A45OFUCXE3ZF","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"WRG4A45O","created_at":"2026-05-18T12:33:30Z"}],"graph_snapshots":[{"event_id":"sha256:907d8cef51651a2041ffc18bbe6704be7ef889c73fca58ad9860d642b53d4a1e","target":"graph","created_at":"2026-05-17T23:46:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The combination of deep neural network models and reinforcement learning algorithms can make it possible to learn policies for robotic behaviors that directly read in raw sensory inputs, such as camera images, effectively subsuming both estimation and control into one model. However, real-world applications of reinforcement learning must specify the goal of the task by means of a manually programmed reward function, which in practice requires either designing the very same perception pipeline that end-to-end reinforcement learning promises to avoid, or else instrumenting the environment with a","authors_text":"Avi Singh, Chelsea Finn, Kristian Hartikainen, Larry Yang, Sergey Levine","cross_cats":["cs.CV","cs.RO","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-16T17:59:23Z","title":"End-to-End Robotic Reinforcement Learning without Reward Engineering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.07854","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:33e94b3201cb36132b07a18a5ce503b5226d2ad852eb76074374894143981225","target":"record","created_at":"2026-05-17T23:46:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"99ebd438cbc5c83872146c5d7b9f5ed7779cb6b288d2a604978e88fb50f5acbf","cross_cats_sorted":["cs.CV","cs.RO","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-16T17:59:23Z","title_canon_sha256":"e25b013f47ce7c83f9d878b5a7081b5a574c4f6e371de0cb905e4165e2a24f31"},"schema_version":"1.0","source":{"id":"1904.07854","kind":"arxiv","version":2}},"canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","first_computed_at":"2026-05-17T23:46:03.201799Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:46:03.201799Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"AXbnuyVAxtm9BPnRjVXiaXfkf1tMbW8le8Axly+eZkTlJ3QDsXtj3trHWEpl0dDZkTP/xkSCLbA42ljuUnO8CQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:46:03.202518Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.07854","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:33e94b3201cb36132b07a18a5ce503b5226d2ad852eb76074374894143981225","sha256:907d8cef51651a2041ffc18bbe6704be7ef889c73fca58ad9860d642b53d4a1e"],"state_sha256":"d801e564904cafc538228f203f1846ff3effb06a8d7e8b7793688649552de364"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WkfGJFHrCil4RzrGvVe/fW2S5eNBn2zaQzDUKLGtRQ/fv6C1cg+yxt0jgp6WsFBbQX3+lhCqAJyNxi+TagX1CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T10:57:06.101572Z","bundle_sha256":"614db6b6e7a359584869e1b62663c5949c8bf078b321beb599d8a21c9396d673"}}