{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:DK4Q6YTSUHEPXOPH63BWXJIAJB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"34e35a4b2746699657771f44572ad58ce919ea3227a43b9f68d5c633a0d86b80","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-09T12:03:15Z","title_canon_sha256":"fe69ec5fd359898d4c7daaa155e0dc8e31e70a42bec7cc1fd76c2c1cab639c67"},"schema_version":"1.0","source":{"id":"1802.03216","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.03216","created_at":"2026-05-17T23:56:48Z"},{"alias_kind":"arxiv_version","alias_value":"1802.03216v2","created_at":"2026-05-17T23:56:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.03216","created_at":"2026-05-17T23:56:48Z"},{"alias_kind":"pith_short_12","alias_value":"DK4Q6YTSUHEP","created_at":"2026-05-18T12:32:19Z"},{"alias_kind":"pith_short_16","alias_value":"DK4Q6YTSUHEPXOPH","created_at":"2026-05-18T12:32:19Z"},{"alias_kind":"pith_short_8","alias_value":"DK4Q6YTS","created_at":"2026-05-18T12:32:19Z"}],"graph_snapshots":[{"event_id":"sha256:5c869bf584c436d909c10464406a680207ccc40cd90b58d667f5c49160562204","target":"graph","created_at":"2026-05-17T23:56:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Within the context of video games the notion of perfectly rational agents can be undesirable as it leads to uninteresting situations, where humans face tough adversarial decision makers. Current frameworks for stochastic games and reinforcement learning prohibit tuneable strategies as they seek optimal performance. In this paper, we enable such tuneable behaviour by generalising soft Q-learning to stochastic games, where more than one agent interact strategically. We contribute both theoretically and empirically. On the theory side, we show that games with soft Q-learning exhibit a unique valu","authors_text":"Felix Leibfried, Haitham Bou-Ammar, Jordi Grau-Moya","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-09T12:03:15Z","title":"Balancing Two-Player Stochastic Games with Soft Q-Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.03216","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:95c4943f2adad690cf165d777b4d19baec46c11be7ce2cbd2dbecd9092b1ab04","target":"record","created_at":"2026-05-17T23:56:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"34e35a4b2746699657771f44572ad58ce919ea3227a43b9f68d5c633a0d86b80","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-09T12:03:15Z","title_canon_sha256":"fe69ec5fd359898d4c7daaa155e0dc8e31e70a42bec7cc1fd76c2c1cab639c67"},"schema_version":"1.0","source":{"id":"1802.03216","kind":"arxiv","version":2}},"canonical_sha256":"1ab90f6272a1c8fbb9e7f6c36ba500485f61792db133b5f16bebb971a7cacded","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1ab90f6272a1c8fbb9e7f6c36ba500485f61792db133b5f16bebb971a7cacded","first_computed_at":"2026-05-17T23:56:48.391988Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:56:48.391988Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1OejaILtyvvMzDmd5P72fzCgPegy+inEhDmquOH/8l0U5/zVk65c0z/YbKsBw0NfEn2/j3cbpoqA/d8lOI6bDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:56:48.392394Z","signed_message":"canonical_sha256_bytes"},"source_id":"1802.03216","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:95c4943f2adad690cf165d777b4d19baec46c11be7ce2cbd2dbecd9092b1ab04","sha256:5c869bf584c436d909c10464406a680207ccc40cd90b58d667f5c49160562204"],"state_sha256":"66335beafbc7b55826788a7baba1afa30d3d1eff2323a6e1c35599d16fe6a533"}