{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:LFILXV3BOO73RTABK7HNJRNJ54","short_pith_number":"pith:LFILXV3B","schema_version":"1.0","canonical_sha256":"5950bbd76173bfb8cc0157ced4c5a9ef171730e95581f2cb61312f66c45a147d","source":{"kind":"arxiv","id":"1806.10293","version":3},"attestation_state":"computed","paper":{"title":"QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexander Herzog, Alex Irpan, Deirdre Quillen, Dmitry Kalashnikov, Eric Jang, Ethan Holly, Julian Ibarz, Mrinal Kalakrishnan, Peter Pastor, Sergey Levine, Vincent Vanhoucke","submitted_at":"2018-06-27T04:34:30Z","abstract_excerpt":"In this paper, we study the problem of learning vision-based dynamic manipulation skills using a scalable reinforcement learning approach. We study this problem in the context of grasping, a longstanding challenge in robotic manipulation. In contrast to static learning behaviors that choose a grasp point and then execute the desired grasp, our method enables closed-loop vision-based control, whereby the robot continuously updates its grasp strategy based on the most recent observations to optimize long-horizon grasp success. To that end, we introduce QT-Opt, a scalable self-supervised vision-b"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1806.10293","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-27T04:34:30Z","cross_cats_sorted":["cs.AI","cs.CV","cs.RO","stat.ML"],"title_canon_sha256":"d838e03fa1a7fd82ff1b77a79afb91a235d88797008c95a6565fbddf0a954fa0","abstract_canon_sha256":"bc71be13e337b79e3253f89afd9bc874d3edd4525dd4aa43752fe4558c2e29ff"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:44.639067Z","signature_b64":"oIvlzwcth5HQ5GpwD20ROTGywCPxywfN8GNNr9zHaUymwoys/eEWW6zKdjF2TLswrPZj/PO187VCu+jPDSRVAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5950bbd76173bfb8cc0157ced4c5a9ef171730e95581f2cb61312f66c45a147d","last_reissued_at":"2026-05-17T23:59:44.638710Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:44.638710Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexander Herzog, Alex Irpan, Deirdre Quillen, Dmitry Kalashnikov, Eric Jang, Ethan Holly, Julian Ibarz, Mrinal Kalakrishnan, Peter Pastor, Sergey Levine, Vincent Vanhoucke","submitted_at":"2018-06-27T04:34:30Z","abstract_excerpt":"In this paper, we study the problem of learning vision-based dynamic manipulation skills using a scalable reinforcement learning approach. We study this problem in the context of grasping, a longstanding challenge in robotic manipulation. In contrast to static learning behaviors that choose a grasp point and then execute the desired grasp, our method enables closed-loop vision-based control, whereby the robot continuously updates its grasp strategy based on the most recent observations to optimize long-horizon grasp success. To that end, we introduce QT-Opt, a scalable self-supervised vision-b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.10293","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.10293","created_at":"2026-05-17T23:59:44.638765+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.10293v3","created_at":"2026-05-17T23:59:44.638765+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.10293","created_at":"2026-05-17T23:59:44.638765+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":15,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2505.03233","citing_title":"GraspVLA: a Grasping Foundation Model Pre-trained on Billion-scale Synthetic Action Data","ref_index":55,"is_internal_anchor":true},{"citing_arxiv_id":"2310.17596","citing_title":"MimicGen: A Data Generation System for Scalable Robot Learning using Human Demonstrations","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2505.18719","citing_title":"VLA-RL: Towards Masterful and General Robotic Manipulation with Scalable Reinforcement Learning","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2503.10631","citing_title":"HybridVLA: Collaborative Diffusion and Autoregression in a Unified Vision-Language-Action Model","ref_index":84,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12090","citing_title":"World Action Models: The Next Frontier in Embodied AI","ref_index":118,"is_internal_anchor":false},{"citing_arxiv_id":"2406.02523","citing_title":"RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots","ref_index":18,"is_internal_anchor":false},{"citing_arxiv_id":"2511.14759","citing_title":"$\\pi^{*}_{0.6}$: a VLA That Learns From Experience","ref_index":15,"is_internal_anchor":false},{"citing_arxiv_id":"2604.28192","citing_title":"LaST-R1: Reinforcing Robotic Manipulation via Adaptive Physical Latent Reasoning","ref_index":62,"is_internal_anchor":false},{"citing_arxiv_id":"2411.19650","citing_title":"CogACT: A Foundational Vision-Language-Action Model for Synergizing Cognition and Action in Robotic Manipulation","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2604.28192","citing_title":"LaST-R1: Reinforcing Robotic Manipulation via Adaptive Physical Latent Reasoning","ref_index":62,"is_internal_anchor":false},{"citing_arxiv_id":"2310.08864","citing_title":"Open X-Embodiment: Robotic Learning Datasets and RT-X Models","ref_index":66,"is_internal_anchor":false},{"citing_arxiv_id":"2604.18000","citing_title":"Unmasking the Illusion of Embodied Reasoning in Vision-Language-Action Models","ref_index":60,"is_internal_anchor":false},{"citing_arxiv_id":"2403.12945","citing_title":"DROID: A Large-Scale In-The-Wild Robot Manipulation Dataset","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"2405.12213","citing_title":"Octo: An Open-Source Generalist Robot Policy","ref_index":42,"is_internal_anchor":false},{"citing_arxiv_id":"2406.09246","citing_title":"OpenVLA: An Open-Source Vision-Language-Action Model","ref_index":46,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54","json":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54.json","graph_json":"https://pith.science/api/pith-number/LFILXV3BOO73RTABK7HNJRNJ54/graph.json","events_json":"https://pith.science/api/pith-number/LFILXV3BOO73RTABK7HNJRNJ54/events.json","paper":"https://pith.science/paper/LFILXV3B"},"agent_actions":{"view_html":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54","download_json":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54.json","view_paper":"https://pith.science/paper/LFILXV3B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.10293&json=true","fetch_graph":"https://pith.science/api/pith-number/LFILXV3BOO73RTABK7HNJRNJ54/graph.json","fetch_events":"https://pith.science/api/pith-number/LFILXV3BOO73RTABK7HNJRNJ54/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54/action/storage_attestation","attest_author":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54/action/author_attestation","sign_citation":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54/action/citation_signature","submit_replication":"https://pith.science/pith/LFILXV3BOO73RTABK7HNJRNJ54/action/replication_record"}},"created_at":"2026-05-17T23:59:44.638765+00:00","updated_at":"2026-05-17T23:59:44.638765+00:00"}