{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:VHDXVNSDC2SL633EMN7B4JF35Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e930d6abe27e72904f05922ae3dda67cd0dd7acec02c15888010570c36eaced5","cross_cats_sorted":["cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-01-15T05:13:47Z","title_canon_sha256":"356a1b8aaed48def37cbc2b1c91b2f2d422f1d3bb7e881d70235eae2c6dd5faa"},"schema_version":"1.0","source":{"id":"1401.3454","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1401.3454","created_at":"2026-05-18T03:02:09Z"},{"alias_kind":"arxiv_version","alias_value":"1401.3454v1","created_at":"2026-05-18T03:02:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1401.3454","created_at":"2026-05-18T03:02:09Z"},{"alias_kind":"pith_short_12","alias_value":"VHDXVNSDC2SL","created_at":"2026-05-18T12:28:54Z"},{"alias_kind":"pith_short_16","alias_value":"VHDXVNSDC2SL633E","created_at":"2026-05-18T12:28:54Z"},{"alias_kind":"pith_short_8","alias_value":"VHDXVNSD","created_at":"2026-05-18T12:28:54Z"}],"graph_snapshots":[{"event_id":"sha256:000d3354ebe5458194c336fe14fbab6283d5523424920f2b4e5daf6245cb9bb0","target":"graph","created_at":"2026-05-18T03:02:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Several multiagent reinforcement learning (MARL) algorithms have been proposed to optimize agents decisions. Due to the complexity of the problem, the majority of the previously developed MARL algorithms assumed agents either had some knowledge of the underlying game (such as Nash equilibria) and/or observed other agents actions and the rewards they received.\n  We introduce a new MARL algorithm called the Weighted Policy Learner (WPL), which allows agents to reach a Nash Equilibrium (NE) in benchmark 2-player-2-action games with minimum knowledge. Using WPL, the only feedback an agent needs is","authors_text":"Sherief Abdallah, Victor Lesser","cross_cats":["cs.MA"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-01-15T05:13:47Z","title":"A Multiagent Reinforcement Learning Algorithm with Non-linear Dynamics"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1401.3454","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6c661e6728922104142c3746eb391a6ac687d62f5d50ae216b713b61fd380814","target":"record","created_at":"2026-05-18T03:02:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e930d6abe27e72904f05922ae3dda67cd0dd7acec02c15888010570c36eaced5","cross_cats_sorted":["cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-01-15T05:13:47Z","title_canon_sha256":"356a1b8aaed48def37cbc2b1c91b2f2d422f1d3bb7e881d70235eae2c6dd5faa"},"schema_version":"1.0","source":{"id":"1401.3454","kind":"arxiv","version":1}},"canonical_sha256":"a9c77ab64316a4bf6f64637e1e24bbec0e0dd1b3d784110e79375364dbf3f886","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a9c77ab64316a4bf6f64637e1e24bbec0e0dd1b3d784110e79375364dbf3f886","first_computed_at":"2026-05-18T03:02:09.015661Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:02:09.015661Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"MfH9WYx5WuHnC+eMoAQreAlP31jLU7CwlOF57J8CyihDxu7AMKbhSSSsYXWY9bo4zu7qkQaxyyQNBaRZ5A7sAg==","signature_status":"signed_v1","signed_at":"2026-05-18T03:02:09.016383Z","signed_message":"canonical_sha256_bytes"},"source_id":"1401.3454","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6c661e6728922104142c3746eb391a6ac687d62f5d50ae216b713b61fd380814","sha256:000d3354ebe5458194c336fe14fbab6283d5523424920f2b4e5daf6245cb9bb0"],"state_sha256":"e32b9a97d5ea648cffbce3b6b53a6a3d7017659675167e8a7891e6e267455222"}