{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:2MOLGTR7DO6TBVAZRGIKNI65S5","short_pith_number":"pith:2MOLGTR7","schema_version":"1.0","canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","source":{"kind":"arxiv","id":"2409.01447","version":3},"attestation_state":"computed","paper":{"title":"Decentralized Best-Response-Based Learning in Two-Player Zero-Sum Stochastic Games: A Finite-Sample Analysis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.GT"],"primary_cat":"cs.LG","authors_text":"Adam Wierman, Asuman Ozdaglar, Eric Mazumdar, Kaiqing Zhang, Zaiwei Chen","submitted_at":"2024-09-02T20:07:25Z","abstract_excerpt":"We present a finite-sample analysis of decentralized learning in two-player zero-sum matrix games and stochastic games, with a focus on best-response-based learning algorithms. In matrix games, the learning algorithm is payoff-based and symmetric: each player updates its policy using only its own payoff observations, incrementally moving toward an estimated smoothed best response to the opponent's latest policy. For stochastic games, we build on this matrix-game primitive to develop a learning algorithm called value iteration with smoothed best response (VI-SBR), which combines smoothed-best-r"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2409.01447","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-02T20:07:25Z","cross_cats_sorted":["cs.GT"],"title_canon_sha256":"9106acd78f7819b6e7adee621d32daa40beb3a0f97335331ce4ed0ebe6e349bf","abstract_canon_sha256":"8a093075e4ff0ec6160d1cde3f1a7b625819448906af04cf83c781b3df60d468"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:15:12.784234Z","signature_b64":"mkr4tEO0wwdStnoyUMALB7qbSdillFa1kkHAms9J7w4vRblmS1GFwArYy9n1cgg2dMiw8AP/EEhDjGi+26QlDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","last_reissued_at":"2026-06-26T01:15:12.783738Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:15:12.783738Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Decentralized Best-Response-Based Learning in Two-Player Zero-Sum Stochastic Games: A Finite-Sample Analysis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.GT"],"primary_cat":"cs.LG","authors_text":"Adam Wierman, Asuman Ozdaglar, Eric Mazumdar, Kaiqing Zhang, Zaiwei Chen","submitted_at":"2024-09-02T20:07:25Z","abstract_excerpt":"We present a finite-sample analysis of decentralized learning in two-player zero-sum matrix games and stochastic games, with a focus on best-response-based learning algorithms. In matrix games, the learning algorithm is payoff-based and symmetric: each player updates its policy using only its own payoff observations, incrementally moving toward an estimated smoothed best response to the opponent's latest policy. For stochastic games, we build on this matrix-game primitive to develop a learning algorithm called value iteration with smoothed best response (VI-SBR), which combines smoothed-best-r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.01447","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2409.01447/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2409.01447","created_at":"2026-06-26T01:15:12.783797+00:00"},{"alias_kind":"arxiv_version","alias_value":"2409.01447v3","created_at":"2026-06-26T01:15:12.783797+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.01447","created_at":"2026-06-26T01:15:12.783797+00:00"},{"alias_kind":"pith_short_12","alias_value":"2MOLGTR7DO6T","created_at":"2026-06-26T01:15:12.783797+00:00"},{"alias_kind":"pith_short_16","alias_value":"2MOLGTR7DO6TBVAZ","created_at":"2026-06-26T01:15:12.783797+00:00"},{"alias_kind":"pith_short_8","alias_value":"2MOLGTR7","created_at":"2026-06-26T01:15:12.783797+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.09363","citing_title":"Near-Optimal Last-Iterate Convergence for Zero-Sum Games with Bandit Feedback and Opponent Actions","ref_index":233,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5","json":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5.json","graph_json":"https://pith.science/api/pith-number/2MOLGTR7DO6TBVAZRGIKNI65S5/graph.json","events_json":"https://pith.science/api/pith-number/2MOLGTR7DO6TBVAZRGIKNI65S5/events.json","paper":"https://pith.science/paper/2MOLGTR7"},"agent_actions":{"view_html":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5","download_json":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5.json","view_paper":"https://pith.science/paper/2MOLGTR7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2409.01447&json=true","fetch_graph":"https://pith.science/api/pith-number/2MOLGTR7DO6TBVAZRGIKNI65S5/graph.json","fetch_events":"https://pith.science/api/pith-number/2MOLGTR7DO6TBVAZRGIKNI65S5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/action/storage_attestation","attest_author":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/action/author_attestation","sign_citation":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/action/citation_signature","submit_replication":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/action/replication_record"}},"created_at":"2026-06-26T01:15:12.783797+00:00","updated_at":"2026-06-26T01:15:12.783797+00:00"}