{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2021:PV7ZDPVRC6K3GG7TL7SRBPY4C6","short_pith_number":"pith:PV7ZDPVR","schema_version":"1.0","canonical_sha256":"7d7f91beb11795b31bf35fe510bf1c1798956522c752091587724771682b3323","source":{"kind":"arxiv","id":"2106.01345","version":2},"attestation_state":"computed","paper":{"title":"Decision Transformer: Reinforcement Learning via Sequence Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Aditya Grover, Aravind Rajeswaran, Aravind Srinivas, Igor Mordatch, Kevin Lu, Kimin Lee, Lili Chen, Michael Laskin, Pieter Abbeel","submitted_at":"2021-06-02T17:53:39Z","abstract_excerpt":"We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked Transformer. By conditioning an autoregr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2106.01345","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2021-06-02T17:53:39Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d58b305c53587d7a9b6b1e2d8667ae06316c83902a379f04d430a4f54999fd3c","abstract_canon_sha256":"ebd4efdd11afc5ee56f305abfbfac56bdbde63bbd62f5b7039ff8e2b3470b4f7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T15:05:26.331147Z","signature_b64":"SuKb+d0EFs3k+FC1YLCO3kdih6Z0LD4YETGvHY5+UVIo7XjvT1Q0y9EUwbJrjEJ3bEqJFrOaDi5jmENa/YkpAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7d7f91beb11795b31bf35fe510bf1c1798956522c752091587724771682b3323","last_reissued_at":"2026-05-18T15:05:26.328089Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T15:05:26.328089Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Decision Transformer: Reinforcement Learning via Sequence Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Aditya Grover, Aravind Rajeswaran, Aravind Srinivas, Igor Mordatch, Kevin Lu, Kimin Lee, Lili Chen, Michael Laskin, Pieter Abbeel","submitted_at":"2021-06-02T17:53:39Z","abstract_excerpt":"We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked Transformer. By conditioning an autoregr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2106.01345","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2106.01345","created_at":"2026-05-18T15:05:26.328220+00:00"},{"alias_kind":"arxiv_version","alias_value":"2106.01345v2","created_at":"2026-05-18T15:05:26.328220+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2106.01345","created_at":"2026-05-18T15:05:26.328220+00:00"},{"alias_kind":"pith_short_12","alias_value":"PV7ZDPVRC6K3","created_at":"2026-05-18T15:05:26.328220+00:00"},{"alias_kind":"pith_short_16","alias_value":"PV7ZDPVRC6K3GG7T","created_at":"2026-05-18T15:05:26.328220+00:00"},{"alias_kind":"pith_short_8","alias_value":"PV7ZDPVR","created_at":"2026-05-18T15:05:26.328220+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":20,"sample":[{"citing_arxiv_id":"2605.11151","citing_title":"RankQ: Offline-to-Online Reinforcement Learning via Self-Supervised Action Ranking","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02075","citing_title":"Graph Transformers and Stabilized Reinforcement Learning for Large-Scale Dynamic Routing Modulation and Spectrum Allocation in Elastic Optical Networks","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2507.00432","citing_title":"Does Math Reasoning Improve General LLM Capabilities? Understanding Transferability of LLM Reasoning","ref_index":99,"is_internal_anchor":true},{"citing_arxiv_id":"2509.19538","citing_title":"DAWM: Diffusion Action World Models for Offline Reinforcement Learning via Action-Inferred Transitions","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2402.05070","citing_title":"A Roadmap to Pluralistic Alignment","ref_index":296,"is_internal_anchor":true},{"citing_arxiv_id":"2505.18719","citing_title":"VLA-RL: Towards Masterful and General Robotic Manipulation with Scalable Reinforcement Learning","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2310.10639","citing_title":"Zero-Shot Robotic Manipulation with Pretrained Image-Editing Diffusion Models","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14211","citing_title":"ASH: Agents that Self-Hone via Embodied Learning","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03190","citing_title":"Gradient Boosting within a Single Attention Layer","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2108.03298","citing_title":"What Matters in Learning from Offline Human Demonstrations for Robot Manipulation","ref_index":50,"is_internal_anchor":true},{"citing_arxiv_id":"2211.09085","citing_title":"Galactica: A Large Language Model for Science","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11151","citing_title":"RankQ: Offline-to-Online Reinforcement Learning via Self-Supervised Action Ranking","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2110.06169","citing_title":"Offline Reinforcement Learning with Implicit Q-Learning","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08404","citing_title":"Built Environment Reasoning from Remote Sensing Imagery Using Large Vision--Language Models","ref_index":71,"is_internal_anchor":true},{"citing_arxiv_id":"2112.00861","citing_title":"A General Language Assistant as a Laboratory for Alignment","ref_index":216,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01694","citing_title":"Latent State Design for World Models under Sufficiency Constraints","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2604.05859","citing_title":"When Do We Need LLMs? A Diagnostic for Language-Driven Bandits","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04662","citing_title":"Anticipatory Reinforcement Learning: From Generative Path-Laws to Distributional Value Functions","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02300","citing_title":"A Meta Reinforcement Learning Approach to Goals-Based Wealth Management","ref_index":43,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02075","citing_title":"Graph Transformers and Stabilized Reinforcement Learning for Large-Scale Dynamic Routing Modulation and Spectrum Allocation in Elastic Optical Networks","ref_index":27,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6","json":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6.json","graph_json":"https://pith.science/api/pith-number/PV7ZDPVRC6K3GG7TL7SRBPY4C6/graph.json","events_json":"https://pith.science/api/pith-number/PV7ZDPVRC6K3GG7TL7SRBPY4C6/events.json","paper":"https://pith.science/paper/PV7ZDPVR"},"agent_actions":{"view_html":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6","download_json":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6.json","view_paper":"https://pith.science/paper/PV7ZDPVR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2106.01345&json=true","fetch_graph":"https://pith.science/api/pith-number/PV7ZDPVRC6K3GG7TL7SRBPY4C6/graph.json","fetch_events":"https://pith.science/api/pith-number/PV7ZDPVRC6K3GG7TL7SRBPY4C6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6/action/storage_attestation","attest_author":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6/action/author_attestation","sign_citation":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6/action/citation_signature","submit_replication":"https://pith.science/pith/PV7ZDPVRC6K3GG7TL7SRBPY4C6/action/replication_record"}},"created_at":"2026-05-18T15:05:26.328220+00:00","updated_at":"2026-05-18T15:05:26.328220+00:00"}