{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:YPS4WGHHG6VJKAQWADHRFYCLYH","short_pith_number":"pith:YPS4WGHH","canonical_record":{"source":{"id":"1906.02138","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-06-05T16:56:54Z","cross_cats_sorted":[],"title_canon_sha256":"979b325487ed886e67ce08950e503031d08b2b2bb6291f2c894c3b0fd8f5a01d","abstract_canon_sha256":"0e3d389fbecab223c8ac5d9dc19a326c58a6fd59450aaadbab32ad62fb4c2abd"},"schema_version":"1.0"},"canonical_sha256":"c3e5cb18e737aa95021600cf12e04bc1c17fe596632a8be0361afb93337aa41d","source":{"kind":"arxiv","id":"1906.02138","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.02138","created_at":"2026-05-17T23:44:05Z"},{"alias_kind":"arxiv_version","alias_value":"1906.02138v1","created_at":"2026-05-17T23:44:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.02138","created_at":"2026-05-17T23:44:05Z"},{"alias_kind":"pith_short_12","alias_value":"YPS4WGHHG6VJ","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"YPS4WGHHG6VJKAQW","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"YPS4WGHH","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:YPS4WGHHG6VJKAQWADHRFYCLYH","target":"record","payload":{"canonical_record":{"source":{"id":"1906.02138","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-06-05T16:56:54Z","cross_cats_sorted":[],"title_canon_sha256":"979b325487ed886e67ce08950e503031d08b2b2bb6291f2c894c3b0fd8f5a01d","abstract_canon_sha256":"0e3d389fbecab223c8ac5d9dc19a326c58a6fd59450aaadbab32ad62fb4c2abd"},"schema_version":"1.0"},"canonical_sha256":"c3e5cb18e737aa95021600cf12e04bc1c17fe596632a8be0361afb93337aa41d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:05.188642Z","signature_b64":"Mv/D0cZQgbzsOk1GVXGPFj47XkP9U2gb0DZmERMokqbKlTqvfbn55uYFXSUN/Qh0weDnSj9Nc3Rz1169Zg7jCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c3e5cb18e737aa95021600cf12e04bc1c17fe596632a8be0361afb93337aa41d","last_reissued_at":"2026-05-17T23:44:05.188203Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:05.188203Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.02138","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4m7rCDCFThvjG7GCOtBOkE2XHbmq8GXuNhIOFI2wndAWoeRmARDgr87eK09uxvsBpfVqTUeFrh9SX5Uf8sdSAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T11:35:12.294380Z"},"content_sha256":"9ca23d7b13e089da69c2170e9287aac14943256739fe533a4033257b9e86b3ce","schema_version":"1.0","event_id":"sha256:9ca23d7b13e089da69c2170e9287aac14943256739fe533a4033257b9e86b3ce"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:YPS4WGHHG6VJKAQWADHRFYCLYH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Exploration with Unreliable Intrinsic Reward in Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Shimon Whiteson, Tabish Rashid, Wendelin B\\\"ohmer","submitted_at":"2019-06-05T16:56:54Z","abstract_excerpt":"This paper investigates the use of intrinsic reward to guide exploration in multi-agent reinforcement learning. We discuss the challenges in applying intrinsic reward to multiple collaborative agents and demonstrate how unreliable reward can prevent decentralized agents from learning the optimal policy. We address this problem with a novel framework, Independent Centrally-assisted Q-learning (ICQL), in which decentralized agents share control and an experience replay buffer with a centralized agent. Only the centralized agent is intrinsically rewarded, but the decentralized agents still benefi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.02138","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"p+cZG8vCN0Mw8EwQga+nwD9KU3G3awq5/7Rvcj511OPAxGIrF9ZGHgVP1Y5Rgj0T5Y+Viw/Dtfy0DoQJpGR6Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T11:35:12.295119Z"},"content_sha256":"98240088f97e93d13e9b8d42338deb0ae4df68c903a52f87bd7a1841bc665459","schema_version":"1.0","event_id":"sha256:98240088f97e93d13e9b8d42338deb0ae4df68c903a52f87bd7a1841bc665459"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YPS4WGHHG6VJKAQWADHRFYCLYH/bundle.json","state_url":"https://pith.science/pith/YPS4WGHHG6VJKAQWADHRFYCLYH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YPS4WGHHG6VJKAQWADHRFYCLYH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T11:35:12Z","links":{"resolver":"https://pith.science/pith/YPS4WGHHG6VJKAQWADHRFYCLYH","bundle":"https://pith.science/pith/YPS4WGHHG6VJKAQWADHRFYCLYH/bundle.json","state":"https://pith.science/pith/YPS4WGHHG6VJKAQWADHRFYCLYH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YPS4WGHHG6VJKAQWADHRFYCLYH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:YPS4WGHHG6VJKAQWADHRFYCLYH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0e3d389fbecab223c8ac5d9dc19a326c58a6fd59450aaadbab32ad62fb4c2abd","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-06-05T16:56:54Z","title_canon_sha256":"979b325487ed886e67ce08950e503031d08b2b2bb6291f2c894c3b0fd8f5a01d"},"schema_version":"1.0","source":{"id":"1906.02138","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.02138","created_at":"2026-05-17T23:44:05Z"},{"alias_kind":"arxiv_version","alias_value":"1906.02138v1","created_at":"2026-05-17T23:44:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.02138","created_at":"2026-05-17T23:44:05Z"},{"alias_kind":"pith_short_12","alias_value":"YPS4WGHHG6VJ","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"YPS4WGHHG6VJKAQW","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"YPS4WGHH","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:98240088f97e93d13e9b8d42338deb0ae4df68c903a52f87bd7a1841bc665459","target":"graph","created_at":"2026-05-17T23:44:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper investigates the use of intrinsic reward to guide exploration in multi-agent reinforcement learning. We discuss the challenges in applying intrinsic reward to multiple collaborative agents and demonstrate how unreliable reward can prevent decentralized agents from learning the optimal policy. We address this problem with a novel framework, Independent Centrally-assisted Q-learning (ICQL), in which decentralized agents share control and an experience replay buffer with a centralized agent. Only the centralized agent is intrinsically rewarded, but the decentralized agents still benefi","authors_text":"Shimon Whiteson, Tabish Rashid, Wendelin B\\\"ohmer","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-06-05T16:56:54Z","title":"Exploration with Unreliable Intrinsic Reward in Multi-Agent Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.02138","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9ca23d7b13e089da69c2170e9287aac14943256739fe533a4033257b9e86b3ce","target":"record","created_at":"2026-05-17T23:44:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0e3d389fbecab223c8ac5d9dc19a326c58a6fd59450aaadbab32ad62fb4c2abd","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-06-05T16:56:54Z","title_canon_sha256":"979b325487ed886e67ce08950e503031d08b2b2bb6291f2c894c3b0fd8f5a01d"},"schema_version":"1.0","source":{"id":"1906.02138","kind":"arxiv","version":1}},"canonical_sha256":"c3e5cb18e737aa95021600cf12e04bc1c17fe596632a8be0361afb93337aa41d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c3e5cb18e737aa95021600cf12e04bc1c17fe596632a8be0361afb93337aa41d","first_computed_at":"2026-05-17T23:44:05.188203Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:05.188203Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Mv/D0cZQgbzsOk1GVXGPFj47XkP9U2gb0DZmERMokqbKlTqvfbn55uYFXSUN/Qh0weDnSj9Nc3Rz1169Zg7jCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:05.188642Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.02138","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9ca23d7b13e089da69c2170e9287aac14943256739fe533a4033257b9e86b3ce","sha256:98240088f97e93d13e9b8d42338deb0ae4df68c903a52f87bd7a1841bc665459"],"state_sha256":"8c072cbcc3621b0e52429ee6ac20a6c80d9ee3fbb12deb112c49858057529674"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PK1ikYxvFfVhojHty/TDO/Clb2dB3jXeyWS+FJP4DCNzVkr3lJBfEQdzrmqtsD+9+u4jMNQtiv1BVfn7GDDQDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T11:35:12.299055Z","bundle_sha256":"805c82170f09454225685f5d610d12cfe8d9cb4b9d94b6d6c7fea5a208476918"}}