{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ROILB5ES3TWZIBUSUZMHW6NCFJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ee83d677f39b5747e015712eed0f440ce315e30f3ef3859b0cd767bf729b0801","cross_cats_sorted":["cs.GT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-19T01:07:39Z","title_canon_sha256":"fe22aefd17cf0eeda11b83d86e603c0676c1b907d7e259c2cf5e2e92cb77f207"},"schema_version":"1.0","source":{"id":"2605.19235","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19235","created_at":"2026-05-20T01:05:34Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19235v1","created_at":"2026-05-20T01:05:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19235","created_at":"2026-05-20T01:05:34Z"},{"alias_kind":"pith_short_12","alias_value":"ROILB5ES3TWZ","created_at":"2026-05-20T01:05:34Z"},{"alias_kind":"pith_short_16","alias_value":"ROILB5ES3TWZIBUS","created_at":"2026-05-20T01:05:34Z"},{"alias_kind":"pith_short_8","alias_value":"ROILB5ES","created_at":"2026-05-20T01:05:34Z"}],"graph_snapshots":[{"event_id":"sha256:54fa6aefd4f5c6ee45d89e63a14feaebb46110dcdd62cb09d6f314f8dfaea905","target":"graph","created_at":"2026-05-20T01:05:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.19235/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Competitive multi-agent reinforcement learning in imperfect-information games requires agents to act under partial observability and against adversarial opponents, necessitating stochastic policies. While self-play reinforcement learning with Proximal Policy Optimization (PPO) has achieved strong empirical success, its standard advantage estimator, generalized advantage estimation, suffers from additional variance due to the sampling of stochastic future actions. This variance is amplified in equilibrium self-play because of the stochastic nature of the equilibrium policy and persists even whe","authors_text":"Gabriele Farina, Zhiyuan Fan","cross_cats":["cs.GT"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-19T01:07:39Z","title":"GAE Falls Short in Imperfect-Information Self-Play Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19235","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ec50c1f7bbad6cbd00f7b1c4d7ab4015223f3de9007a5747d04f5d344394fef9","target":"record","created_at":"2026-05-20T01:05:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ee83d677f39b5747e015712eed0f440ce315e30f3ef3859b0cd767bf729b0801","cross_cats_sorted":["cs.GT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-19T01:07:39Z","title_canon_sha256":"fe22aefd17cf0eeda11b83d86e603c0676c1b907d7e259c2cf5e2e92cb77f207"},"schema_version":"1.0","source":{"id":"2605.19235","kind":"arxiv","version":1}},"canonical_sha256":"8b90b0f492dced940692a6587b79a22a4405fc195f1856ae395747144659bc68","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8b90b0f492dced940692a6587b79a22a4405fc195f1856ae395747144659bc68","first_computed_at":"2026-05-20T01:05:34.837479Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:34.837479Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RxRR86avSlbnf1nC/WX/xa5890Ng8CPwKPzSdq6NHiAu0HlfLzVoO26ZmyloSzMRDGLKDtECTixQr2fB8QjHCQ==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:34.838169Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.19235","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ec50c1f7bbad6cbd00f7b1c4d7ab4015223f3de9007a5747d04f5d344394fef9","sha256:54fa6aefd4f5c6ee45d89e63a14feaebb46110dcdd62cb09d6f314f8dfaea905"],"state_sha256":"9b971b8fa68ea728c487d324dac84524c7d35ff7394a43da80d8b4a0a3fd5c26"}