{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:MMRBVIC6MMVSSQTJBU4H7VQ3WY","short_pith_number":"pith:MMRBVIC6","canonical_record":{"source":{"id":"1803.05402","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-14T16:59:17Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"a4b8292a159ce3f4ea29bcee8bf88ce5bcd633ca6313176866adea6411416811","abstract_canon_sha256":"d38669087336242360f25d0fb90d90c0db2dd2d5b6e462c3e367d6fd814eb188"},"schema_version":"1.0"},"canonical_sha256":"63221aa05e632b2942690d387fd61bb623f164204a2cae3ff65417b8dcf7a00f","source":{"kind":"arxiv","id":"1803.05402","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.05402","created_at":"2026-05-18T00:06:25Z"},{"alias_kind":"arxiv_version","alias_value":"1803.05402v5","created_at":"2026-05-18T00:06:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.05402","created_at":"2026-05-18T00:06:25Z"},{"alias_kind":"pith_short_12","alias_value":"MMRBVIC6MMVS","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"MMRBVIC6MMVSSQTJ","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"MMRBVIC6","created_at":"2026-05-18T12:32:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:MMRBVIC6MMVSSQTJBU4H7VQ3WY","target":"record","payload":{"canonical_record":{"source":{"id":"1803.05402","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-14T16:59:17Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"a4b8292a159ce3f4ea29bcee8bf88ce5bcd633ca6313176866adea6411416811","abstract_canon_sha256":"d38669087336242360f25d0fb90d90c0db2dd2d5b6e462c3e367d6fd814eb188"},"schema_version":"1.0"},"canonical_sha256":"63221aa05e632b2942690d387fd61bb623f164204a2cae3ff65417b8dcf7a00f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:06:25.090937Z","signature_b64":"sFiPvGjMymZLvew81W7rTi6Zm4DNlfjdJlIpKAEZ+TkoQHBilLQilbfuy2lwqxbgv39mjd8FAzto7ykZeG6XBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"63221aa05e632b2942690d387fd61bb623f164204a2cae3ff65417b8dcf7a00f","last_reissued_at":"2026-05-18T00:06:25.090218Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:06:25.090218Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.05402","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JvPbVZbgqSzoIOlBzKCAB+qG43IiSZMFzIQkylY5wNsV3Xq6/io/69XouPY8YxpaYHZQ30TaIW2P3hFlvHBHBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T00:14:38.816988Z"},"content_sha256":"6c80da0ae2148fa08def6c8ea4bbf8713d0e7e4c5312a037aa79f561af10096e","schema_version":"1.0","event_id":"sha256:6c80da0ae2148fa08def6c8ea4bbf8713d0e7e4c5312a037aa79f561af10096e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:MMRBVIC6MMVSSQTJBU4H7VQ3WY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Imitation Learning with Concurrent Actions in 3D Games","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.AI","authors_text":"Henrik Holst, Jack Harmer, Joakim Bergdahl, Jorge del Val, Kristoffer Sj\\\"o\\\"o, Linus Gissl\\'en, Magnus Nordin, Tom Olsson","submitted_at":"2018-03-14T16:59:17Z","abstract_excerpt":"In this work we describe a novel deep reinforcement learning architecture that allows multiple actions to be selected at every time-step in an efficient manner. Multi-action policies allow complex behaviours to be learnt that would otherwise be hard to achieve when using single action selection techniques. We use both imitation learning and temporal difference (TD) reinforcement learning (RL) to provide a 4x improvement in training time and 2.5x improvement in performance over single action selection TD RL. We demonstrate the capabilities of this network using a complex in-house 3D game. Mimic"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.05402","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"25c0cx89ZspTNybEmUB0QRqt4AQB4IhXayiwz6B0N3draoXbWxKO1sQxnupAsCKg5OHWkSE4ztkwfnu5DugCBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T00:14:38.817359Z"},"content_sha256":"44dca6640cd6fa88fd934070f6a44d8e77a5e120d02e3a9bb0931c0e45aab563","schema_version":"1.0","event_id":"sha256:44dca6640cd6fa88fd934070f6a44d8e77a5e120d02e3a9bb0931c0e45aab563"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY/bundle.json","state_url":"https://pith.science/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T00:14:38Z","links":{"resolver":"https://pith.science/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY","bundle":"https://pith.science/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY/bundle.json","state":"https://pith.science/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MMRBVIC6MMVSSQTJBU4H7VQ3WY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:MMRBVIC6MMVSSQTJBU4H7VQ3WY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d38669087336242360f25d0fb90d90c0db2dd2d5b6e462c3e367d6fd814eb188","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-14T16:59:17Z","title_canon_sha256":"a4b8292a159ce3f4ea29bcee8bf88ce5bcd633ca6313176866adea6411416811"},"schema_version":"1.0","source":{"id":"1803.05402","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.05402","created_at":"2026-05-18T00:06:25Z"},{"alias_kind":"arxiv_version","alias_value":"1803.05402v5","created_at":"2026-05-18T00:06:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.05402","created_at":"2026-05-18T00:06:25Z"},{"alias_kind":"pith_short_12","alias_value":"MMRBVIC6MMVS","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"MMRBVIC6MMVSSQTJ","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"MMRBVIC6","created_at":"2026-05-18T12:32:37Z"}],"graph_snapshots":[{"event_id":"sha256:44dca6640cd6fa88fd934070f6a44d8e77a5e120d02e3a9bb0931c0e45aab563","target":"graph","created_at":"2026-05-18T00:06:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this work we describe a novel deep reinforcement learning architecture that allows multiple actions to be selected at every time-step in an efficient manner. Multi-action policies allow complex behaviours to be learnt that would otherwise be hard to achieve when using single action selection techniques. We use both imitation learning and temporal difference (TD) reinforcement learning (RL) to provide a 4x improvement in training time and 2.5x improvement in performance over single action selection TD RL. We demonstrate the capabilities of this network using a complex in-house 3D game. Mimic","authors_text":"Henrik Holst, Jack Harmer, Joakim Bergdahl, Jorge del Val, Kristoffer Sj\\\"o\\\"o, Linus Gissl\\'en, Magnus Nordin, Tom Olsson","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-14T16:59:17Z","title":"Imitation Learning with Concurrent Actions in 3D Games"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.05402","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6c80da0ae2148fa08def6c8ea4bbf8713d0e7e4c5312a037aa79f561af10096e","target":"record","created_at":"2026-05-18T00:06:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d38669087336242360f25d0fb90d90c0db2dd2d5b6e462c3e367d6fd814eb188","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-14T16:59:17Z","title_canon_sha256":"a4b8292a159ce3f4ea29bcee8bf88ce5bcd633ca6313176866adea6411416811"},"schema_version":"1.0","source":{"id":"1803.05402","kind":"arxiv","version":5}},"canonical_sha256":"63221aa05e632b2942690d387fd61bb623f164204a2cae3ff65417b8dcf7a00f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"63221aa05e632b2942690d387fd61bb623f164204a2cae3ff65417b8dcf7a00f","first_computed_at":"2026-05-18T00:06:25.090218Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:06:25.090218Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sFiPvGjMymZLvew81W7rTi6Zm4DNlfjdJlIpKAEZ+TkoQHBilLQilbfuy2lwqxbgv39mjd8FAzto7ykZeG6XBA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:06:25.090937Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.05402","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6c80da0ae2148fa08def6c8ea4bbf8713d0e7e4c5312a037aa79f561af10096e","sha256:44dca6640cd6fa88fd934070f6a44d8e77a5e120d02e3a9bb0931c0e45aab563"],"state_sha256":"5ece6cc352d79cd90b87e996a22110799a3b047623ada243c21b2ad83e428764"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"V+FuIweamQ+JmDp9+WJGeoiukSbf2Kx8C9ldxCpjkAmHD/pqXyuLWFpmdZMHssZibE7irGOiQfz0pPp7EviPCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T00:14:38.819355Z","bundle_sha256":"312b28ccae7d36a3d25dc1e88ac33dd92db325b3438c1292d73f2be472b7dcdb"}}