{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:ZVDVGKYBYOE6M33N2A2Q2ISIXA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"78e825731617b19975c7452b01823814cbf93eb9b1251f1c688d2d81299f59be","cross_cats_sorted":["cs.LG","cs.MA","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-09-29T04:33:15Z","title_canon_sha256":"1f76c5dc09a57f361a7adadaa012dd0d1432a90ec753055f70f8c92389b0087b"},"schema_version":"1.0","source":{"id":"1810.00147","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.00147","created_at":"2026-05-17T23:51:52Z"},{"alias_kind":"arxiv_version","alias_value":"1810.00147v3","created_at":"2026-05-17T23:51:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.00147","created_at":"2026-05-17T23:51:52Z"},{"alias_kind":"pith_short_12","alias_value":"ZVDVGKYBYOE6","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZVDVGKYBYOE6M33N","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZVDVGKYB","created_at":"2026-05-18T12:33:07Z"}],"graph_snapshots":[{"event_id":"sha256:6294e07447d75fc038cb3bc39b9507cdeeb4190c3ae79acc7f9c3d3c6c678b26","target":"graph","created_at":"2026-05-17T23:51:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Most of the prior work on multi-agent reinforcement learning (MARL) achieves optimal collaboration by directly controlling the agents to maximize a common reward. In this paper, we aim to address this from a different angle. In particular, we consider scenarios where there are self-interested agents (i.e., worker agents) which have their own minds (preferences, intentions, skills, etc.) and can not be dictated to perform tasks they do not wish to do. For achieving optimal coordination among these agents, we train a super agent (i.e., the manager) to manage them by first inferring their minds b","authors_text":"Tianmin Shu, Yuandong Tian","cross_cats":["cs.LG","cs.MA","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-09-29T04:33:15Z","title":"M$^3$RL: Mind-aware Multi-agent Management Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.00147","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e220994d71d371e5dbf61f8b62507dab9c159d6455f787d161baedb3c094eba4","target":"record","created_at":"2026-05-17T23:51:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"78e825731617b19975c7452b01823814cbf93eb9b1251f1c688d2d81299f59be","cross_cats_sorted":["cs.LG","cs.MA","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-09-29T04:33:15Z","title_canon_sha256":"1f76c5dc09a57f361a7adadaa012dd0d1432a90ec753055f70f8c92389b0087b"},"schema_version":"1.0","source":{"id":"1810.00147","kind":"arxiv","version":3}},"canonical_sha256":"cd47532b01c389e66f6dd0350d2248b811498bf9057f8de1ae1b6162c9814d82","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cd47532b01c389e66f6dd0350d2248b811498bf9057f8de1ae1b6162c9814d82","first_computed_at":"2026-05-17T23:51:52.499975Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:51:52.499975Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8Q1yae/QVJyK9KTj/LENxDQ4+nuP7sbD+/axdNKDq6k0pwcN/FA25Rv3H0oXO2a8+ZNkUr9oEc8AYqG8MjUqCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:51:52.500698Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.00147","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e220994d71d371e5dbf61f8b62507dab9c159d6455f787d161baedb3c094eba4","sha256:6294e07447d75fc038cb3bc39b9507cdeeb4190c3ae79acc7f9c3d3c6c678b26"],"state_sha256":"613b2ce5d68231be3ef2fc444aa4e7f96800fee7d0da1efdcafeff9f2b538e28"}