{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:MIDNW3MVVF2VDQW7Z4X7JY65MY","short_pith_number":"pith:MIDNW3MV","schema_version":"1.0","canonical_sha256":"6206db6d95a97551c2dfcf2ff4e3dd66159f4cdcaf0c33e5614e170e9edc2302","source":{"kind":"arxiv","id":"1810.08647","version":4},"attestation_state":"computed","paper":{"title":"Social Influence as Intrinsic Motivation for Multi-Agent Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","stat.ML"],"primary_cat":"cs.LG","authors_text":"Angeliki Lazaridou, Caglar Gulcehre, DJ Strouse, Edward Hughes, Joel Z. Leibo, Nando de Freitas, Natasha Jaques, Pedro A. Ortega","submitted_at":"2018-10-19T19:01:15Z","abstract_excerpt":"We propose a unified mechanism for achieving coordination and communication in Multi-Agent Reinforcement Learning (MARL), through rewarding agents for having causal influence over other agents' actions. Causal influence is assessed using counterfactual reasoning. At each timestep, an agent simulates alternate actions that it could have taken, and computes their effect on the behavior of other agents. Actions that lead to bigger changes in other agents' behavior are considered influential and are rewarded. We show that this is equivalent to rewarding agents for having high mutual information be"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.08647","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-19T19:01:15Z","cross_cats_sorted":["cs.AI","cs.MA","stat.ML"],"title_canon_sha256":"7a81cff3493c9f525d44af498b7d6eca7c9fb25839add9126c44210dc75c38df","abstract_canon_sha256":"5e66c0062d461cf61a831717c90c03c3850ac88e50d97a896c67794ee7f6f4c5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:58.712947Z","signature_b64":"BNYNOlDmMPiNbqYHllqNed/zOMaX83WiQBRPfzc6Td5KYeqXmOkNMHYcnVz0gE/PMrLNVLCROCNGtVuqVj8FBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6206db6d95a97551c2dfcf2ff4e3dd66159f4cdcaf0c33e5614e170e9edc2302","last_reissued_at":"2026-05-17T23:42:58.712335Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:58.712335Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Social Influence as Intrinsic Motivation for Multi-Agent Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MA","stat.ML"],"primary_cat":"cs.LG","authors_text":"Angeliki Lazaridou, Caglar Gulcehre, DJ Strouse, Edward Hughes, Joel Z. Leibo, Nando de Freitas, Natasha Jaques, Pedro A. Ortega","submitted_at":"2018-10-19T19:01:15Z","abstract_excerpt":"We propose a unified mechanism for achieving coordination and communication in Multi-Agent Reinforcement Learning (MARL), through rewarding agents for having causal influence over other agents' actions. Causal influence is assessed using counterfactual reasoning. At each timestep, an agent simulates alternate actions that it could have taken, and computes their effect on the behavior of other agents. Actions that lead to bigger changes in other agents' behavior are considered influential and are rewarded. We show that this is equivalent to rewarding agents for having high mutual information be"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.08647","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.08647","created_at":"2026-05-17T23:42:58.712427+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.08647v4","created_at":"2026-05-17T23:42:58.712427+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.08647","created_at":"2026-05-17T23:42:58.712427+00:00"},{"alias_kind":"pith_short_12","alias_value":"MIDNW3MVVF2V","created_at":"2026-05-18T12:32:37.024351+00:00"},{"alias_kind":"pith_short_16","alias_value":"MIDNW3MVVF2VDQW7","created_at":"2026-05-18T12:32:37.024351+00:00"},{"alias_kind":"pith_short_8","alias_value":"MIDNW3MV","created_at":"2026-05-18T12:32:37.024351+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY","json":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY.json","graph_json":"https://pith.science/api/pith-number/MIDNW3MVVF2VDQW7Z4X7JY65MY/graph.json","events_json":"https://pith.science/api/pith-number/MIDNW3MVVF2VDQW7Z4X7JY65MY/events.json","paper":"https://pith.science/paper/MIDNW3MV"},"agent_actions":{"view_html":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY","download_json":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY.json","view_paper":"https://pith.science/paper/MIDNW3MV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.08647&json=true","fetch_graph":"https://pith.science/api/pith-number/MIDNW3MVVF2VDQW7Z4X7JY65MY/graph.json","fetch_events":"https://pith.science/api/pith-number/MIDNW3MVVF2VDQW7Z4X7JY65MY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY/action/storage_attestation","attest_author":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY/action/author_attestation","sign_citation":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY/action/citation_signature","submit_replication":"https://pith.science/pith/MIDNW3MVVF2VDQW7Z4X7JY65MY/action/replication_record"}},"created_at":"2026-05-17T23:42:58.712427+00:00","updated_at":"2026-05-17T23:42:58.712427+00:00"}