{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:WRG4A45OFUCXE3ZF7SKNUXFELN","short_pith_number":"pith:WRG4A45O","schema_version":"1.0","canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","source":{"kind":"arxiv","id":"1904.07854","version":2},"attestation_state":"computed","paper":{"title":"End-to-End Robotic Reinforcement Learning without Reward Engineering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Avi Singh, Chelsea Finn, Kristian Hartikainen, Larry Yang, Sergey Levine","submitted_at":"2019-04-16T17:59:23Z","abstract_excerpt":"The combination of deep neural network models and reinforcement learning algorithms can make it possible to learn policies for robotic behaviors that directly read in raw sensory inputs, such as camera images, effectively subsuming both estimation and control into one model. However, real-world applications of reinforcement learning must specify the goal of the task by means of a manually programmed reward function, which in practice requires either designing the very same perception pipeline that end-to-end reinforcement learning promises to avoid, or else instrumenting the environment with a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.07854","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-16T17:59:23Z","cross_cats_sorted":["cs.CV","cs.RO","stat.ML"],"title_canon_sha256":"e25b013f47ce7c83f9d878b5a7081b5a574c4f6e371de0cb905e4165e2a24f31","abstract_canon_sha256":"99ebd438cbc5c83872146c5d7b9f5ed7779cb6b288d2a604978e88fb50f5acbf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:03.202518Z","signature_b64":"AXbnuyVAxtm9BPnRjVXiaXfkf1tMbW8le8Axly+eZkTlJ3QDsXtj3trHWEpl0dDZkTP/xkSCLbA42ljuUnO8CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b44dc073ae2d05726f25fc94da5ca45b4ddc264411cdb16d561e75495c423edd","last_reissued_at":"2026-05-17T23:46:03.201799Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:03.201799Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"End-to-End Robotic Reinforcement Learning without Reward Engineering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Avi Singh, Chelsea Finn, Kristian Hartikainen, Larry Yang, Sergey Levine","submitted_at":"2019-04-16T17:59:23Z","abstract_excerpt":"The combination of deep neural network models and reinforcement learning algorithms can make it possible to learn policies for robotic behaviors that directly read in raw sensory inputs, such as camera images, effectively subsuming both estimation and control into one model. However, real-world applications of reinforcement learning must specify the goal of the task by means of a manually programmed reward function, which in practice requires either designing the very same perception pipeline that end-to-end reinforcement learning promises to avoid, or else instrumenting the environment with a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.07854","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.07854","created_at":"2026-05-17T23:46:03.201905+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.07854v2","created_at":"2026-05-17T23:46:03.201905+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.07854","created_at":"2026-05-17T23:46:03.201905+00:00"},{"alias_kind":"pith_short_12","alias_value":"WRG4A45OFUCX","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"WRG4A45OFUCXE3ZF","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"WRG4A45O","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2504.11737","citing_title":"Hardware Co-Designed Optimal Control for Programmable Atomic Quantum Processors via Reinforcement Learning","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2210.00030","citing_title":"VIP: Towards Universal Visual Reward and Representation via Value-Implicit Pre-Training","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2108.03298","citing_title":"What Matters in Learning from Offline Human Demonstrations for Robot Manipulation","ref_index":64,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN","json":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN.json","graph_json":"https://pith.science/api/pith-number/WRG4A45OFUCXE3ZF7SKNUXFELN/graph.json","events_json":"https://pith.science/api/pith-number/WRG4A45OFUCXE3ZF7SKNUXFELN/events.json","paper":"https://pith.science/paper/WRG4A45O"},"agent_actions":{"view_html":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN","download_json":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN.json","view_paper":"https://pith.science/paper/WRG4A45O","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.07854&json=true","fetch_graph":"https://pith.science/api/pith-number/WRG4A45OFUCXE3ZF7SKNUXFELN/graph.json","fetch_events":"https://pith.science/api/pith-number/WRG4A45OFUCXE3ZF7SKNUXFELN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/action/storage_attestation","attest_author":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/action/author_attestation","sign_citation":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/action/citation_signature","submit_replication":"https://pith.science/pith/WRG4A45OFUCXE3ZF7SKNUXFELN/action/replication_record"}},"created_at":"2026-05-17T23:46:03.201905+00:00","updated_at":"2026-05-17T23:46:03.201905+00:00"}