{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:RNJP5ILZIEHKDEXFKIQFDSX5WC","short_pith_number":"pith:RNJP5ILZ","canonical_record":{"source":{"id":"1807.09427","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-07-25T03:56:04Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"24e2816786041182c76a23e3816b98ff7b357b1f10d89a913235bb45f606c6ca","abstract_canon_sha256":"4abce1e5d5fd5c3de206fde809037ba1a60eea5ea0a73e3193f8a78bae1f46b6"},"schema_version":"1.0"},"canonical_sha256":"8b52fea179410ea192e5522051cafdb09aa33bd41eabb910e8d7445daac923b1","source":{"kind":"arxiv","id":"1807.09427","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.09427","created_at":"2026-05-18T00:09:51Z"},{"alias_kind":"arxiv_version","alias_value":"1807.09427v1","created_at":"2026-05-18T00:09:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.09427","created_at":"2026-05-18T00:09:51Z"},{"alias_kind":"pith_short_12","alias_value":"RNJP5ILZIEHK","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"RNJP5ILZIEHKDEXF","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"RNJP5ILZ","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:RNJP5ILZIEHKDEXFKIQFDSX5WC","target":"record","payload":{"canonical_record":{"source":{"id":"1807.09427","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-07-25T03:56:04Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"24e2816786041182c76a23e3816b98ff7b357b1f10d89a913235bb45f606c6ca","abstract_canon_sha256":"4abce1e5d5fd5c3de206fde809037ba1a60eea5ea0a73e3193f8a78bae1f46b6"},"schema_version":"1.0"},"canonical_sha256":"8b52fea179410ea192e5522051cafdb09aa33bd41eabb910e8d7445daac923b1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:09:51.361894Z","signature_b64":"VfB5dxuz7udhdBQDV14UiQ2eZSafWKSTaJQiWIfqVyxPEjVTvzOZ892yKnlJrm/z1Jr97KWJTyraTXgCP8h7DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8b52fea179410ea192e5522051cafdb09aa33bd41eabb910e8d7445daac923b1","last_reissued_at":"2026-05-18T00:09:51.361197Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:09:51.361197Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1807.09427","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:09:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CkXepLH251DDb7D4uP40iSfV9INCamnBnZcAdU0EcpTAooALdPEZivgq7EflvAiobKW5pujmuScKK9Dv/QASCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:38:37.822580Z"},"content_sha256":"d0cbea19cd94f0aad9a42a6ced399eaf08a3ae41d182ab1597cca9dcf189c725","schema_version":"1.0","event_id":"sha256:d0cbea19cd94f0aad9a42a6ced399eaf08a3ae41d182ab1597cca9dcf189c725"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:RNJP5ILZIEHKDEXFKIQFDSX5WC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-Agent Reinforcement Learning: A Report on Challenges and Approaches","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.AI","authors_text":"Sanyam Kapoor","submitted_at":"2018-07-25T03:56:04Z","abstract_excerpt":"Reinforcement Learning (RL) is a learning paradigm concerned with learning to control a system so as to maximize an objective over the long term. This approach to learning has received immense interest in recent times and success manifests itself in the form of human-level performance on games like \\textit{Go}. While RL is emerging as a practical component in real-life systems, most successes have been in Single Agent domains. This report will instead specifically focus on challenges that are unique to Multi-Agent Systems interacting in mixed cooperative and competitive environments. The repor"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.09427","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:09:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2yGRXkc8W2K3CTppaT3DNo8Xm7nB7wiIYiVNfFA42T2tWEbvLSWnZDR/BXIwvK/fsYeR6pJWfAakKi3QCyslBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:38:37.823211Z"},"content_sha256":"2afa7d566a00d7e6c47ae6df9557cb7738ad7cba1f1266fd9910c3347e2bdc66","schema_version":"1.0","event_id":"sha256:2afa7d566a00d7e6c47ae6df9557cb7738ad7cba1f1266fd9910c3347e2bdc66"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC/bundle.json","state_url":"https://pith.science/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T23:38:37Z","links":{"resolver":"https://pith.science/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC","bundle":"https://pith.science/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC/bundle.json","state":"https://pith.science/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RNJP5ILZIEHKDEXFKIQFDSX5WC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:RNJP5ILZIEHKDEXFKIQFDSX5WC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4abce1e5d5fd5c3de206fde809037ba1a60eea5ea0a73e3193f8a78bae1f46b6","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-07-25T03:56:04Z","title_canon_sha256":"24e2816786041182c76a23e3816b98ff7b357b1f10d89a913235bb45f606c6ca"},"schema_version":"1.0","source":{"id":"1807.09427","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.09427","created_at":"2026-05-18T00:09:51Z"},{"alias_kind":"arxiv_version","alias_value":"1807.09427v1","created_at":"2026-05-18T00:09:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.09427","created_at":"2026-05-18T00:09:51Z"},{"alias_kind":"pith_short_12","alias_value":"RNJP5ILZIEHK","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"RNJP5ILZIEHKDEXF","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"RNJP5ILZ","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:2afa7d566a00d7e6c47ae6df9557cb7738ad7cba1f1266fd9910c3347e2bdc66","target":"graph","created_at":"2026-05-18T00:09:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement Learning (RL) is a learning paradigm concerned with learning to control a system so as to maximize an objective over the long term. This approach to learning has received immense interest in recent times and success manifests itself in the form of human-level performance on games like \\textit{Go}. While RL is emerging as a practical component in real-life systems, most successes have been in Single Agent domains. This report will instead specifically focus on challenges that are unique to Multi-Agent Systems interacting in mixed cooperative and competitive environments. The repor","authors_text":"Sanyam Kapoor","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-07-25T03:56:04Z","title":"Multi-Agent Reinforcement Learning: A Report on Challenges and Approaches"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.09427","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d0cbea19cd94f0aad9a42a6ced399eaf08a3ae41d182ab1597cca9dcf189c725","target":"record","created_at":"2026-05-18T00:09:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4abce1e5d5fd5c3de206fde809037ba1a60eea5ea0a73e3193f8a78bae1f46b6","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-07-25T03:56:04Z","title_canon_sha256":"24e2816786041182c76a23e3816b98ff7b357b1f10d89a913235bb45f606c6ca"},"schema_version":"1.0","source":{"id":"1807.09427","kind":"arxiv","version":1}},"canonical_sha256":"8b52fea179410ea192e5522051cafdb09aa33bd41eabb910e8d7445daac923b1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8b52fea179410ea192e5522051cafdb09aa33bd41eabb910e8d7445daac923b1","first_computed_at":"2026-05-18T00:09:51.361197Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:09:51.361197Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VfB5dxuz7udhdBQDV14UiQ2eZSafWKSTaJQiWIfqVyxPEjVTvzOZ892yKnlJrm/z1Jr97KWJTyraTXgCP8h7DQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:09:51.361894Z","signed_message":"canonical_sha256_bytes"},"source_id":"1807.09427","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d0cbea19cd94f0aad9a42a6ced399eaf08a3ae41d182ab1597cca9dcf189c725","sha256:2afa7d566a00d7e6c47ae6df9557cb7738ad7cba1f1266fd9910c3347e2bdc66"],"state_sha256":"37620a4b104de0f0857798bb996e4fc86ae2083f33d20f43567024b9c41fa109"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6+CjpS8tF5LJ+O8DwRjYMaU8KIbRn4kbyvMA0hZ1v2pkwmLWshG4s95n6lL9ok8LtcCINOFjDvElP/+jj5iNCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T23:38:37.826631Z","bundle_sha256":"513e4702a8bab5ee44578b3699a5d8b7c625c6104128901b9c4375aa0a0117bf"}}