{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:4NZFMGQ7U2ZXFQHTCI2A32DT5M","short_pith_number":"pith:4NZFMGQ7","schema_version":"1.0","canonical_sha256":"e372561a1fa6b372c0f312340de873eb30bd2d752e0ccb18445ea80540fa956d","source":{"kind":"arxiv","id":"1708.04782","version":1},"attestation_state":"computed","paper":{"title":"StarCraft II: A New Challenge for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Alexander Sasha Vezhnevets, Alireza Makhzani, Anders Ekermo, Anthony Brunasso, David Lawrence, David Silver, Hado van Hasselt, Heinrich K\\\"uttler, Jacob Repp, John Agapiou, John Quan, Julian Schrittwieser, Karen Simonyan, Kevin Calderone, Michelle Yeo, Oriol Vinyals, Paul Keet, Petko Georgiev, Rodney Tsing, Sergey Bartunov, Stephen Gaffney, Stig Petersen, Timo Ewalds, Timothy Lillicrap, Tom Schaul","submitted_at":"2017-08-16T06:20:52Z","abstract_excerpt":"This paper introduces SC2LE (StarCraft II Learning Environment), a reinforcement learning environment based on the StarCraft II game. This domain poses a new grand challenge for reinforcement learning, representing a more difficult class of problems than considered in most prior work. It is a multi-agent problem with multiple players interacting; there is imperfect information due to a partially observed map; it has a large action space involving the selection and control of hundreds of units; it has a large state space that must be observed solely from raw input feature planes; and it has del"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1708.04782","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-08-16T06:20:52Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f096c96be246fa790681614dd4f40552d3b452b7c4dc259ce6e904d933d841d8","abstract_canon_sha256":"6d368906bb57f708cc2244c29d2f3d9d6839fc70138530f1d297505e0aef7dd1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:37:57.915364Z","signature_b64":"G40CJjqESBrHUYFQBX0z2QPuU7p2ZfmQ4w0qKIMYRQJDSQrb0IApjSTNryfEtw7vQ68YSCcehWqeiKP1uX+LBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e372561a1fa6b372c0f312340de873eb30bd2d752e0ccb18445ea80540fa956d","last_reissued_at":"2026-05-18T00:37:57.914812Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:37:57.914812Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"StarCraft II: A New Challenge for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Alexander Sasha Vezhnevets, Alireza Makhzani, Anders Ekermo, Anthony Brunasso, David Lawrence, David Silver, Hado van Hasselt, Heinrich K\\\"uttler, Jacob Repp, John Agapiou, John Quan, Julian Schrittwieser, Karen Simonyan, Kevin Calderone, Michelle Yeo, Oriol Vinyals, Paul Keet, Petko Georgiev, Rodney Tsing, Sergey Bartunov, Stephen Gaffney, Stig Petersen, Timo Ewalds, Timothy Lillicrap, Tom Schaul","submitted_at":"2017-08-16T06:20:52Z","abstract_excerpt":"This paper introduces SC2LE (StarCraft II Learning Environment), a reinforcement learning environment based on the StarCraft II game. This domain poses a new grand challenge for reinforcement learning, representing a more difficult class of problems than considered in most prior work. It is a multi-agent problem with multiple players interacting; there is imperfect information due to a partially observed map; it has a large action space involving the selection and control of hundreds of units; it has a large state space that must be observed solely from raw input feature planes; and it has del"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.04782","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1708.04782","created_at":"2026-05-18T00:37:57.914897+00:00"},{"alias_kind":"arxiv_version","alias_value":"1708.04782v1","created_at":"2026-05-18T00:37:57.914897+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.04782","created_at":"2026-05-18T00:37:57.914897+00:00"},{"alias_kind":"pith_short_12","alias_value":"4NZFMGQ7U2ZX","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_16","alias_value":"4NZFMGQ7U2ZXFQHT","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_8","alias_value":"4NZFMGQ7","created_at":"2026-05-18T12:31:00.734936+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":14,"internal_anchor_count":10,"sample":[{"citing_arxiv_id":"1906.10124","citing_title":"On Multi-Agent Learning in Team Sports Games","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"1906.12213","citing_title":"On the notion of number in humans and machines","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"1906.12266","citing_title":"Growing Action Spaces","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"1907.09467","citing_title":"Arena: a toolkit for Multi-Agent Reinforcement Learning","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"1907.09273","citing_title":"Why Build an Assistant in Minecraft?","ref_index":87,"is_internal_anchor":true},{"citing_arxiv_id":"1912.01703","citing_title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2502.13388","citing_title":"Reflection of Episodes: Learning to Play Game from Expert and Self Experiences","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2506.21872","citing_title":"A Survey of Continual Reinforcement Learning","ref_index":83,"is_internal_anchor":true},{"citing_arxiv_id":"2511.15407","citing_title":"IPR-1: Interactive Physical Reasoner","ref_index":66,"is_internal_anchor":true},{"citing_arxiv_id":"2511.17925","citing_title":"Switch-JustDance: Benchmarking Whole Body Motion Tracking Controllers Using a Commercial Console Game","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"1911.01547","citing_title":"On the Measure of Intelligence","ref_index":93,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08685","citing_title":"RAMP: Hybrid DRL for Online Learning of Numeric Action Models","ref_index":46,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01862","citing_title":"QHyer: Q-conditioned Hybrid Attention-mamba Transformer for Offline Goal-conditioned RL","ref_index":236,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07837","citing_title":"Approximation-Free Differentiable Oblique Decision Trees","ref_index":22,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M","json":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M.json","graph_json":"https://pith.science/api/pith-number/4NZFMGQ7U2ZXFQHTCI2A32DT5M/graph.json","events_json":"https://pith.science/api/pith-number/4NZFMGQ7U2ZXFQHTCI2A32DT5M/events.json","paper":"https://pith.science/paper/4NZFMGQ7"},"agent_actions":{"view_html":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M","download_json":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M.json","view_paper":"https://pith.science/paper/4NZFMGQ7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1708.04782&json=true","fetch_graph":"https://pith.science/api/pith-number/4NZFMGQ7U2ZXFQHTCI2A32DT5M/graph.json","fetch_events":"https://pith.science/api/pith-number/4NZFMGQ7U2ZXFQHTCI2A32DT5M/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M/action/storage_attestation","attest_author":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M/action/author_attestation","sign_citation":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M/action/citation_signature","submit_replication":"https://pith.science/pith/4NZFMGQ7U2ZXFQHTCI2A32DT5M/action/replication_record"}},"created_at":"2026-05-18T00:37:57.914897+00:00","updated_at":"2026-05-18T00:37:57.914897+00:00"}