{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:2MOLGTR7DO6TBVAZRGIKNI65S5","short_pith_number":"pith:2MOLGTR7","canonical_record":{"source":{"id":"2409.01447","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-02T20:07:25Z","cross_cats_sorted":["cs.GT"],"title_canon_sha256":"9106acd78f7819b6e7adee621d32daa40beb3a0f97335331ce4ed0ebe6e349bf","abstract_canon_sha256":"8a093075e4ff0ec6160d1cde3f1a7b625819448906af04cf83c781b3df60d468"},"schema_version":"1.0"},"canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","source":{"kind":"arxiv","id":"2409.01447","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2409.01447","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"arxiv_version","alias_value":"2409.01447v3","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.01447","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"pith_short_12","alias_value":"2MOLGTR7DO6T","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"pith_short_16","alias_value":"2MOLGTR7DO6TBVAZ","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"pith_short_8","alias_value":"2MOLGTR7","created_at":"2026-06-26T01:15:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:2MOLGTR7DO6TBVAZRGIKNI65S5","target":"record","payload":{"canonical_record":{"source":{"id":"2409.01447","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-02T20:07:25Z","cross_cats_sorted":["cs.GT"],"title_canon_sha256":"9106acd78f7819b6e7adee621d32daa40beb3a0f97335331ce4ed0ebe6e349bf","abstract_canon_sha256":"8a093075e4ff0ec6160d1cde3f1a7b625819448906af04cf83c781b3df60d468"},"schema_version":"1.0"},"canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:15:12.784234Z","signature_b64":"mkr4tEO0wwdStnoyUMALB7qbSdillFa1kkHAms9J7w4vRblmS1GFwArYy9n1cgg2dMiw8AP/EEhDjGi+26QlDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","last_reissued_at":"2026-06-26T01:15:12.783738Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:15:12.783738Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2409.01447","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:15:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RwfqLWnYwAXUjQMV825aQB3Lt1n7cNDCSrnIZOCpQcNmmfncGPZOuqy9nSPwLdevYgSKX0CfBexC8Y415EbDBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T09:10:35.469202Z"},"content_sha256":"158100cfcdd0fab2cd8339e89b1ed68b3a70bf287245ea25b09224950cc41df9","schema_version":"1.0","event_id":"sha256:158100cfcdd0fab2cd8339e89b1ed68b3a70bf287245ea25b09224950cc41df9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:2MOLGTR7DO6TBVAZRGIKNI65S5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Decentralized Best-Response-Based Learning in Two-Player Zero-Sum Stochastic Games: A Finite-Sample Analysis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.GT"],"primary_cat":"cs.LG","authors_text":"Adam Wierman, Asuman Ozdaglar, Eric Mazumdar, Kaiqing Zhang, Zaiwei Chen","submitted_at":"2024-09-02T20:07:25Z","abstract_excerpt":"We present a finite-sample analysis of decentralized learning in two-player zero-sum matrix games and stochastic games, with a focus on best-response-based learning algorithms. In matrix games, the learning algorithm is payoff-based and symmetric: each player updates its policy using only its own payoff observations, incrementally moving toward an estimated smoothed best response to the opponent's latest policy. For stochastic games, we build on this matrix-game primitive to develop a learning algorithm called value iteration with smoothed best response (VI-SBR), which combines smoothed-best-r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.01447","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2409.01447/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:15:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GLSqvoYGyseVSjfC4Z7RN1uaX78J/gOPOzmLbkBQ5St51NOr8HWWwYf/p/ZAwwmT/oVlqgOASnyfD752LO50Cw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T09:10:35.469594Z"},"content_sha256":"2b12cd54b920c3af6435d7a14b5127bed386341f832294c66ef48247e81a73a3","schema_version":"1.0","event_id":"sha256:2b12cd54b920c3af6435d7a14b5127bed386341f832294c66ef48247e81a73a3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/bundle.json","state_url":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T09:10:35Z","links":{"resolver":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5","bundle":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/bundle.json","state":"https://pith.science/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2MOLGTR7DO6TBVAZRGIKNI65S5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:2MOLGTR7DO6TBVAZRGIKNI65S5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8a093075e4ff0ec6160d1cde3f1a7b625819448906af04cf83c781b3df60d468","cross_cats_sorted":["cs.GT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-02T20:07:25Z","title_canon_sha256":"9106acd78f7819b6e7adee621d32daa40beb3a0f97335331ce4ed0ebe6e349bf"},"schema_version":"1.0","source":{"id":"2409.01447","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2409.01447","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"arxiv_version","alias_value":"2409.01447v3","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.01447","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"pith_short_12","alias_value":"2MOLGTR7DO6T","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"pith_short_16","alias_value":"2MOLGTR7DO6TBVAZ","created_at":"2026-06-26T01:15:12Z"},{"alias_kind":"pith_short_8","alias_value":"2MOLGTR7","created_at":"2026-06-26T01:15:12Z"}],"graph_snapshots":[{"event_id":"sha256:2b12cd54b920c3af6435d7a14b5127bed386341f832294c66ef48247e81a73a3","target":"graph","created_at":"2026-06-26T01:15:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2409.01447/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We present a finite-sample analysis of decentralized learning in two-player zero-sum matrix games and stochastic games, with a focus on best-response-based learning algorithms. In matrix games, the learning algorithm is payoff-based and symmetric: each player updates its policy using only its own payoff observations, incrementally moving toward an estimated smoothed best response to the opponent's latest policy. For stochastic games, we build on this matrix-game primitive to develop a learning algorithm called value iteration with smoothed best response (VI-SBR), which combines smoothed-best-r","authors_text":"Adam Wierman, Asuman Ozdaglar, Eric Mazumdar, Kaiqing Zhang, Zaiwei Chen","cross_cats":["cs.GT"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-02T20:07:25Z","title":"Decentralized Best-Response-Based Learning in Two-Player Zero-Sum Stochastic Games: A Finite-Sample Analysis"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.01447","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:158100cfcdd0fab2cd8339e89b1ed68b3a70bf287245ea25b09224950cc41df9","target":"record","created_at":"2026-06-26T01:15:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8a093075e4ff0ec6160d1cde3f1a7b625819448906af04cf83c781b3df60d468","cross_cats_sorted":["cs.GT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-02T20:07:25Z","title_canon_sha256":"9106acd78f7819b6e7adee621d32daa40beb3a0f97335331ce4ed0ebe6e349bf"},"schema_version":"1.0","source":{"id":"2409.01447","kind":"arxiv","version":3}},"canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d31cb34e3f1bbd30d4198990a6a3dd97445b4e309fd8663ab9f0cab51bab1f0b","first_computed_at":"2026-06-26T01:15:12.783738Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T01:15:12.783738Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mkr4tEO0wwdStnoyUMALB7qbSdillFa1kkHAms9J7w4vRblmS1GFwArYy9n1cgg2dMiw8AP/EEhDjGi+26QlDg==","signature_status":"signed_v1","signed_at":"2026-06-26T01:15:12.784234Z","signed_message":"canonical_sha256_bytes"},"source_id":"2409.01447","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:158100cfcdd0fab2cd8339e89b1ed68b3a70bf287245ea25b09224950cc41df9","sha256:2b12cd54b920c3af6435d7a14b5127bed386341f832294c66ef48247e81a73a3"],"state_sha256":"442a817bf76c61f31312a56af36f29c541217da46b954a68bb2d7082f3efe542"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AoxDPKVPQs4IxNky8LCgY0iu8xd4fQQc8bINeRGW+qtr/1Dp9HuVQjC1wIv/eKeZiYlMsZG1a0flEP3s3mBOCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T09:10:35.471604Z","bundle_sha256":"fb415aedc646bd99d796f43a5a141121890f8fddef7ab7fd635e9b27ea10753f"}}