{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:JZ7HL5XUATFYZ722WTQ74ULFIF","short_pith_number":"pith:JZ7HL5XU","schema_version":"1.0","canonical_sha256":"4e7e75f6f404cb8cff5ab4e1fe5165415bc81550ac6568c34f6a8bb3606d2bea","source":{"kind":"arxiv","id":"1810.02912","version":2},"attestation_state":"computed","paper":{"title":"Actor-Attention-Critic for Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","stat.ML"],"primary_cat":"cs.LG","authors_text":"Fei Sha, Shariq Iqbal","submitted_at":"2018-10-05T23:45:14Z","abstract_excerpt":"Reinforcement learning in multi-agent scenarios is important for real-world applications but presents challenges beyond those seen in single-agent settings. We present an actor-critic algorithm that trains decentralized policies in multi-agent settings, using centrally computed critics that share an attention mechanism which selects relevant information for each agent at every timestep. This attention mechanism enables more effective and scalable learning in complex multi-agent environments, when compared to recent approaches. Our approach is applicable not only to cooperative settings with sh"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.02912","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-05T23:45:14Z","cross_cats_sorted":["cs.AI","cs.MA","stat.ML"],"title_canon_sha256":"24dc353d56b4affc2f4b44d10f29f01f8ba5d3f9d3e037dbd14b706b7c12d627","abstract_canon_sha256":"8c0e8409d348c6ded4b83fd193a5caf7e2a3cea06e22c4c4b6b4a41f314c510b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:58.885577Z","signature_b64":"d+AtYKr7YoNDMbPdZZWM8EXJMbIdeg3zxFS6Uspkjz1EW1LA9ru/bFErEZFe3BVvU6hCoV96fn2rFjf+vzn+AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4e7e75f6f404cb8cff5ab4e1fe5165415bc81550ac6568c34f6a8bb3606d2bea","last_reissued_at":"2026-05-17T23:44:58.884976Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:58.884976Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Actor-Attention-Critic for Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","stat.ML"],"primary_cat":"cs.LG","authors_text":"Fei Sha, Shariq Iqbal","submitted_at":"2018-10-05T23:45:14Z","abstract_excerpt":"Reinforcement learning in multi-agent scenarios is important for real-world applications but presents challenges beyond those seen in single-agent settings. We present an actor-critic algorithm that trains decentralized policies in multi-agent settings, using centrally computed critics that share an attention mechanism which selects relevant information for each agent at every timestep. This attention mechanism enables more effective and scalable learning in complex multi-agent environments, when compared to recent approaches. Our approach is applicable not only to cooperative settings with sh"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.02912","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.02912","created_at":"2026-05-17T23:44:58.885050+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.02912v2","created_at":"2026-05-17T23:44:58.885050+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.02912","created_at":"2026-05-17T23:44:58.885050+00:00"},{"alias_kind":"pith_short_12","alias_value":"JZ7HL5XUATFY","created_at":"2026-05-18T12:32:33.847187+00:00"},{"alias_kind":"pith_short_16","alias_value":"JZ7HL5XUATFYZ722","created_at":"2026-05-18T12:32:33.847187+00:00"},{"alias_kind":"pith_short_8","alias_value":"JZ7HL5XU","created_at":"2026-05-18T12:32:33.847187+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2502.00558","citing_title":"Asynchronous Cooperative Multi-Agent Reinforcement Learning with Limited Communication","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2507.14995","citing_title":"LLM-Enhanced Multi-Agent Reinforcement Learning with Expert Workflow for Real-Time P2P Energy Trading","ref_index":32,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF","json":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF.json","graph_json":"https://pith.science/api/pith-number/JZ7HL5XUATFYZ722WTQ74ULFIF/graph.json","events_json":"https://pith.science/api/pith-number/JZ7HL5XUATFYZ722WTQ74ULFIF/events.json","paper":"https://pith.science/paper/JZ7HL5XU"},"agent_actions":{"view_html":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF","download_json":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF.json","view_paper":"https://pith.science/paper/JZ7HL5XU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.02912&json=true","fetch_graph":"https://pith.science/api/pith-number/JZ7HL5XUATFYZ722WTQ74ULFIF/graph.json","fetch_events":"https://pith.science/api/pith-number/JZ7HL5XUATFYZ722WTQ74ULFIF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF/action/storage_attestation","attest_author":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF/action/author_attestation","sign_citation":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF/action/citation_signature","submit_replication":"https://pith.science/pith/JZ7HL5XUATFYZ722WTQ74ULFIF/action/replication_record"}},"created_at":"2026-05-17T23:44:58.885050+00:00","updated_at":"2026-05-17T23:44:58.885050+00:00"}