{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:YJGTBYAMN4VYWM4V74R6CXBIIS","short_pith_number":"pith:YJGTBYAM","schema_version":"1.0","canonical_sha256":"c24d30e00c6f2b8b3395ff23e15c2844aa7c7d6d220b84a9a7490a6731817393","source":{"kind":"arxiv","id":"1702.03037","version":1},"attestation_state":"computed","paper":{"title":"Multi-agent Reinforcement Learning in Sequential Social Dilemmas","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.GT","cs.LG"],"primary_cat":"cs.MA","authors_text":"Janusz Marecki, Joel Z. Leibo, Marc Lanctot, Thore Graepel, Vinicius Zambaldi","submitted_at":"2017-02-10T01:48:40Z","abstract_excerpt":"Matrix games like Prisoner's Dilemma have guided research on social dilemmas for decades. However, they necessarily treat the choice to cooperate or defect as an atomic action. In real-world social dilemmas these choices are temporally extended. Cooperativeness is a property that applies to policies, not elementary actions. We introduce sequential social dilemmas that share the mixed incentive structure of matrix game social dilemmas but also require agents to learn policies that implement their strategic intentions. We analyze the dynamics of policies learned by multiple self-interested indep"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1702.03037","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MA","submitted_at":"2017-02-10T01:48:40Z","cross_cats_sorted":["cs.AI","cs.GT","cs.LG"],"title_canon_sha256":"2ecadf3802326ae5ee9560c2190da5821a4b98ed3abb57ba0153de4e44960408","abstract_canon_sha256":"1c86842dbc6377d61107a472d5c63350788c8fe66fd33ecfc85f219a31561755"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:50:58.812443Z","signature_b64":"DmlbXIYHFsAJyyPfNytc+Hytz0YIqMfyNUKYyO73sxO5GpqEH/2wiqpePiB/ix0kRv8PJWJBd9B2LN/uusE/Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c24d30e00c6f2b8b3395ff23e15c2844aa7c7d6d220b84a9a7490a6731817393","last_reissued_at":"2026-05-18T00:50:58.811733Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:50:58.811733Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Multi-agent Reinforcement Learning in Sequential Social Dilemmas","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.GT","cs.LG"],"primary_cat":"cs.MA","authors_text":"Janusz Marecki, Joel Z. Leibo, Marc Lanctot, Thore Graepel, Vinicius Zambaldi","submitted_at":"2017-02-10T01:48:40Z","abstract_excerpt":"Matrix games like Prisoner's Dilemma have guided research on social dilemmas for decades. However, they necessarily treat the choice to cooperate or defect as an atomic action. In real-world social dilemmas these choices are temporally extended. Cooperativeness is a property that applies to policies, not elementary actions. We introduce sequential social dilemmas that share the mixed incentive structure of matrix game social dilemmas but also require agents to learn policies that implement their strategic intentions. We analyze the dynamics of policies learned by multiple self-interested indep"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.03037","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1702.03037","created_at":"2026-05-18T00:50:58.811856+00:00"},{"alias_kind":"arxiv_version","alias_value":"1702.03037v1","created_at":"2026-05-18T00:50:58.811856+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.03037","created_at":"2026-05-18T00:50:58.811856+00:00"},{"alias_kind":"pith_short_12","alias_value":"YJGTBYAMN4VY","created_at":"2026-05-18T12:31:56.362134+00:00"},{"alias_kind":"pith_short_16","alias_value":"YJGTBYAMN4VYWM4V","created_at":"2026-05-18T12:31:56.362134+00:00"},{"alias_kind":"pith_short_8","alias_value":"YJGTBYAM","created_at":"2026-05-18T12:31:56.362134+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2601.22292","citing_title":"Learning Incentive Structures for Cooperative Resilience in Multi-Agent Systems under Social Dilemmas","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20348","citing_title":"Memory-Induced Supra-Competitive Outcomes Between Deep Reinforcement Learning Agents in Optimal Trade Execution","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02674","citing_title":"Do Agent Societies Develop Intellectual Elites? The Hidden Power Laws of Collective Cognition in LLM Multi-Agent Systems","ref_index":31,"is_internal_anchor":false},{"citing_arxiv_id":"2604.03818","citing_title":"Investigating the Impact of Subgraph Social Structure Preference on the Strategic Behavior of Networked Mixed-Motive Learning Agents","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08323","citing_title":"The Reciprocity Gradient","ref_index":4,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS","json":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS.json","graph_json":"https://pith.science/api/pith-number/YJGTBYAMN4VYWM4V74R6CXBIIS/graph.json","events_json":"https://pith.science/api/pith-number/YJGTBYAMN4VYWM4V74R6CXBIIS/events.json","paper":"https://pith.science/paper/YJGTBYAM"},"agent_actions":{"view_html":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS","download_json":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS.json","view_paper":"https://pith.science/paper/YJGTBYAM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1702.03037&json=true","fetch_graph":"https://pith.science/api/pith-number/YJGTBYAMN4VYWM4V74R6CXBIIS/graph.json","fetch_events":"https://pith.science/api/pith-number/YJGTBYAMN4VYWM4V74R6CXBIIS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS/action/storage_attestation","attest_author":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS/action/author_attestation","sign_citation":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS/action/citation_signature","submit_replication":"https://pith.science/pith/YJGTBYAMN4VYWM4V74R6CXBIIS/action/replication_record"}},"created_at":"2026-05-18T00:50:58.811856+00:00","updated_at":"2026-05-18T00:50:58.811856+00:00"}