{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:IRNAPEMI7NABQBZ3XYPIHLFIVQ","short_pith_number":"pith:IRNAPEMI","schema_version":"1.0","canonical_sha256":"445a079188fb4018073bbe1e83aca8ac15e51125d34112d77f0e27bf877eacc5","source":{"kind":"arxiv","id":"1812.07019","version":2},"attestation_state":"computed","paper":{"title":"Malthusian Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.MA","q-bio.PE"],"primary_cat":"cs.NE","authors_text":"Adam H. Marblestone, Edgar Du\\'e\\~nez-Guzm\\'an, Edward Hughes, Iain Dunning, Joel Z. Leibo, Julien Perolat, Peter Sunehag, Steven Wheelwright, Thore Graepel","submitted_at":"2018-12-17T19:36:14Z","abstract_excerpt":"Here we explore a new algorithmic framework for multi-agent reinforcement learning, called Malthusian reinforcement learning, which extends self-play to include fitness-linked population size dynamics that drive ongoing innovation. In Malthusian RL, increases in a subpopulation's average return drive subsequent increases in its size, just as Thomas Malthus argued in 1798 was the relationship between preindustrial income levels and population growth. Malthusian reinforcement learning harnesses the competitive pressures arising from growing and shrinking population size to drive agents to explor"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.07019","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.NE","submitted_at":"2018-12-17T19:36:14Z","cross_cats_sorted":["cs.MA","q-bio.PE"],"title_canon_sha256":"409475a60ac8326a2612107de7eabbcab58add29841590221aa3cb0881e80efe","abstract_canon_sha256":"24521b15d00bf00165988bbde3d87f24cc0976bb8b8002971dc49a12f900b7ed"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:15.098186Z","signature_b64":"T36/zcoOB/qQ2P7xYyeCbVO15R7hUZZYOYJyJCMOP5RHfFJClLXGL0/3HA+AHFzTB6Q45BrMGfkDc/gs6X7gAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"445a079188fb4018073bbe1e83aca8ac15e51125d34112d77f0e27bf877eacc5","last_reissued_at":"2026-05-17T23:52:15.097618Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:15.097618Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Malthusian Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.MA","q-bio.PE"],"primary_cat":"cs.NE","authors_text":"Adam H. Marblestone, Edgar Du\\'e\\~nez-Guzm\\'an, Edward Hughes, Iain Dunning, Joel Z. Leibo, Julien Perolat, Peter Sunehag, Steven Wheelwright, Thore Graepel","submitted_at":"2018-12-17T19:36:14Z","abstract_excerpt":"Here we explore a new algorithmic framework for multi-agent reinforcement learning, called Malthusian reinforcement learning, which extends self-play to include fitness-linked population size dynamics that drive ongoing innovation. In Malthusian RL, increases in a subpopulation's average return drive subsequent increases in its size, just as Thomas Malthus argued in 1798 was the relationship between preindustrial income levels and population growth. Malthusian reinforcement learning harnesses the competitive pressures arising from growing and shrinking population size to drive agents to explor"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.07019","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.07019","created_at":"2026-05-17T23:52:15.097726+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.07019v2","created_at":"2026-05-17T23:52:15.097726+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.07019","created_at":"2026-05-17T23:52:15.097726+00:00"},{"alias_kind":"pith_short_12","alias_value":"IRNAPEMI7NAB","created_at":"2026-05-18T12:32:31.084164+00:00"},{"alias_kind":"pith_short_16","alias_value":"IRNAPEMI7NABQBZ3","created_at":"2026-05-18T12:32:31.084164+00:00"},{"alias_kind":"pith_short_8","alias_value":"IRNAPEMI","created_at":"2026-05-18T12:32:31.084164+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ","json":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ.json","graph_json":"https://pith.science/api/pith-number/IRNAPEMI7NABQBZ3XYPIHLFIVQ/graph.json","events_json":"https://pith.science/api/pith-number/IRNAPEMI7NABQBZ3XYPIHLFIVQ/events.json","paper":"https://pith.science/paper/IRNAPEMI"},"agent_actions":{"view_html":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ","download_json":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ.json","view_paper":"https://pith.science/paper/IRNAPEMI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.07019&json=true","fetch_graph":"https://pith.science/api/pith-number/IRNAPEMI7NABQBZ3XYPIHLFIVQ/graph.json","fetch_events":"https://pith.science/api/pith-number/IRNAPEMI7NABQBZ3XYPIHLFIVQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ/action/storage_attestation","attest_author":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ/action/author_attestation","sign_citation":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ/action/citation_signature","submit_replication":"https://pith.science/pith/IRNAPEMI7NABQBZ3XYPIHLFIVQ/action/replication_record"}},"created_at":"2026-05-17T23:52:15.097726+00:00","updated_at":"2026-05-17T23:52:15.097726+00:00"}