{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:LS3J7FUUH2YOQR25JWRIQOIFRV","short_pith_number":"pith:LS3J7FUU","canonical_record":{"source":{"id":"1806.04562","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-12T14:40:24Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"2715287c87ed2ec75150781eb789ba84a14406b54e170371aba0946aafbd53b3","abstract_canon_sha256":"f058c66ac4ecbb7911bf9313891b249a386bab25f7a4b7ce5f1f48c419c3ab7c"},"schema_version":"1.0"},"canonical_sha256":"5cb69f96943eb0e8475d4da28839058d7b539b97da03103a336c351f82e21e44","source":{"kind":"arxiv","id":"1806.04562","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.04562","created_at":"2026-05-17T23:39:27Z"},{"alias_kind":"arxiv_version","alias_value":"1806.04562v2","created_at":"2026-05-17T23:39:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.04562","created_at":"2026-05-17T23:39:27Z"},{"alias_kind":"pith_short_12","alias_value":"LS3J7FUUH2YO","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LS3J7FUUH2YOQR25","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LS3J7FUU","created_at":"2026-05-18T12:32:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:LS3J7FUUH2YOQR25JWRIQOIFRV","target":"record","payload":{"canonical_record":{"source":{"id":"1806.04562","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-12T14:40:24Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"2715287c87ed2ec75150781eb789ba84a14406b54e170371aba0946aafbd53b3","abstract_canon_sha256":"f058c66ac4ecbb7911bf9313891b249a386bab25f7a4b7ce5f1f48c419c3ab7c"},"schema_version":"1.0"},"canonical_sha256":"5cb69f96943eb0e8475d4da28839058d7b539b97da03103a336c351f82e21e44","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:27.431060Z","signature_b64":"Qa9GJ4BLNFmKXuxue9deOFnmcU6ReUM/LY5F6auuwAO/4TL3TK5zHQMyKKylyHGHqDIq6/Q35KpdEmglQ7reCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5cb69f96943eb0e8475d4da28839058d7b539b97da03103a336c351f82e21e44","last_reissued_at":"2026-05-17T23:39:27.430480Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:27.430480Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.04562","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cozssz45JhYTzqtaqS3EDkSK/8NyH+oFPAkXl3OIlX6B8pGGw1pij51G0j7jpf1IRujuNn5sAXCR0kLD8HX5AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T03:37:13.606670Z"},"content_sha256":"f485c3255c97f3152274dc6d909577cf2bcae91f855428d2b26279ed64bb506b","schema_version":"1.0","event_id":"sha256:f485c3255c97f3152274dc6d909577cf2bcae91f855428d2b26279ed64bb506b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:LS3J7FUUH2YOQR25JWRIQOIFRV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-Agent Deep Reinforcement Learning with Human Strategies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ngoc Duy Nguyen, Saeid Nahavandi, Thanh Nguyen","submitted_at":"2018-06-12T14:40:24Z","abstract_excerpt":"Deep learning has enabled traditional reinforcement learning methods to deal with high-dimensional problems. However, one of the disadvantages of deep reinforcement learning methods is the limited exploration capacity of learning agents. In this paper, we introduce an approach that integrates human strategies to increase the exploration capacity of multiple deep reinforcement learning agents. We also report the development of our own multi-agent environment called Multiple Tank Defence to simulate the proposed approach. The results show the significant performance improvement of multiple agent"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.04562","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aGkJMwbMy7Ls6KZgF24tx4dG8P6oTCeTOq0bzx/FDPgOxGaUP+jxAyXC+kcJB/7M1StqdDCbTBUgjwhr6vVlBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T03:37:13.607002Z"},"content_sha256":"3dcd2ef9d6271b95442672b70ffc8cc589c53770244154c9624d0fa0d9259c4e","schema_version":"1.0","event_id":"sha256:3dcd2ef9d6271b95442672b70ffc8cc589c53770244154c9624d0fa0d9259c4e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LS3J7FUUH2YOQR25JWRIQOIFRV/bundle.json","state_url":"https://pith.science/pith/LS3J7FUUH2YOQR25JWRIQOIFRV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LS3J7FUUH2YOQR25JWRIQOIFRV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T03:37:13Z","links":{"resolver":"https://pith.science/pith/LS3J7FUUH2YOQR25JWRIQOIFRV","bundle":"https://pith.science/pith/LS3J7FUUH2YOQR25JWRIQOIFRV/bundle.json","state":"https://pith.science/pith/LS3J7FUUH2YOQR25JWRIQOIFRV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LS3J7FUUH2YOQR25JWRIQOIFRV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:LS3J7FUUH2YOQR25JWRIQOIFRV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f058c66ac4ecbb7911bf9313891b249a386bab25f7a4b7ce5f1f48c419c3ab7c","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-12T14:40:24Z","title_canon_sha256":"2715287c87ed2ec75150781eb789ba84a14406b54e170371aba0946aafbd53b3"},"schema_version":"1.0","source":{"id":"1806.04562","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.04562","created_at":"2026-05-17T23:39:27Z"},{"alias_kind":"arxiv_version","alias_value":"1806.04562v2","created_at":"2026-05-17T23:39:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.04562","created_at":"2026-05-17T23:39:27Z"},{"alias_kind":"pith_short_12","alias_value":"LS3J7FUUH2YO","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LS3J7FUUH2YOQR25","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LS3J7FUU","created_at":"2026-05-18T12:32:37Z"}],"graph_snapshots":[{"event_id":"sha256:3dcd2ef9d6271b95442672b70ffc8cc589c53770244154c9624d0fa0d9259c4e","target":"graph","created_at":"2026-05-17T23:39:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep learning has enabled traditional reinforcement learning methods to deal with high-dimensional problems. However, one of the disadvantages of deep reinforcement learning methods is the limited exploration capacity of learning agents. In this paper, we introduce an approach that integrates human strategies to increase the exploration capacity of multiple deep reinforcement learning agents. We also report the development of our own multi-agent environment called Multiple Tank Defence to simulate the proposed approach. The results show the significant performance improvement of multiple agent","authors_text":"Ngoc Duy Nguyen, Saeid Nahavandi, Thanh Nguyen","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-12T14:40:24Z","title":"Multi-Agent Deep Reinforcement Learning with Human Strategies"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.04562","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f485c3255c97f3152274dc6d909577cf2bcae91f855428d2b26279ed64bb506b","target":"record","created_at":"2026-05-17T23:39:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f058c66ac4ecbb7911bf9313891b249a386bab25f7a4b7ce5f1f48c419c3ab7c","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-12T14:40:24Z","title_canon_sha256":"2715287c87ed2ec75150781eb789ba84a14406b54e170371aba0946aafbd53b3"},"schema_version":"1.0","source":{"id":"1806.04562","kind":"arxiv","version":2}},"canonical_sha256":"5cb69f96943eb0e8475d4da28839058d7b539b97da03103a336c351f82e21e44","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5cb69f96943eb0e8475d4da28839058d7b539b97da03103a336c351f82e21e44","first_computed_at":"2026-05-17T23:39:27.430480Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:27.430480Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Qa9GJ4BLNFmKXuxue9deOFnmcU6ReUM/LY5F6auuwAO/4TL3TK5zHQMyKKylyHGHqDIq6/Q35KpdEmglQ7reCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:27.431060Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.04562","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f485c3255c97f3152274dc6d909577cf2bcae91f855428d2b26279ed64bb506b","sha256:3dcd2ef9d6271b95442672b70ffc8cc589c53770244154c9624d0fa0d9259c4e"],"state_sha256":"39130806c1fff9355d4a5b09abb889d66abf48297ac562c731333764c05551dc"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lRbWWCawetMNPzKlNJEKnvFp6xkIJ6X73qq50zKbEZTd5COYgmgqaxMaqkT/Ygl7iTPoichMyciKymTizmjxDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T03:37:13.608846Z","bundle_sha256":"4741e9805fbb297265b9f959ec592d6758a673ae9e44672b93c9fb21a2015f14"}}