{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:5WWCZMRJ6GFCMUSXPTTW2GM4WP","short_pith_number":"pith:5WWCZMRJ","schema_version":"1.0","canonical_sha256":"edac2cb229f18a2652577ce76d199cb3dfa06b1b2f0fe2f55e73da5dfad9689b","source":{"kind":"arxiv","id":"1707.01310","version":5},"attestation_state":"computed","paper":{"title":"Learning to Design Games: Strategic Environments in Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Haifeng Zhang, Jun Wang, Weinan Zhang, Wenxin Li, Ying Wen, Yong Yu, Zhiming Zhou","submitted_at":"2017-07-05T10:45:43Z","abstract_excerpt":"In typical reinforcement learning (RL), the environment is assumed given and the goal of the learning is to identify an optimal policy for the agent taking actions through its interactions with the environment. In this paper, we extend this setting by considering the environment is not given, but controllable and learnable through its interaction with the agent at the same time. This extension is motivated by environment design scenarios in the real-world, including game design, shopping space design and traffic signal design. Theoretically, we find a dual Markov decision process (MDP) w.r.t. "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.01310","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-07-05T10:45:43Z","cross_cats_sorted":[],"title_canon_sha256":"2451f249144b8eae8cefcdf534eeab926aaae748367a97fef2b8f6d5cf9186dd","abstract_canon_sha256":"f5f6ec4280f62120c841fa3b495c89e3cf5d25254e94b89290fb7e9a32678276"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T00:14:28.443119Z","signature_b64":"td8XRqVZaXTeLhjU2g4Cl0SDush19Ce/HUUPOVX2PM8EEBcjONWRDia9dMTRR2OouCaC6RCNoCe++L2sAHr+CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"edac2cb229f18a2652577ce76d199cb3dfa06b1b2f0fe2f55e73da5dfad9689b","last_reissued_at":"2026-07-05T00:14:28.442625Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T00:14:28.442625Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning to Design Games: Strategic Environments in Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Haifeng Zhang, Jun Wang, Weinan Zhang, Wenxin Li, Ying Wen, Yong Yu, Zhiming Zhou","submitted_at":"2017-07-05T10:45:43Z","abstract_excerpt":"In typical reinforcement learning (RL), the environment is assumed given and the goal of the learning is to identify an optimal policy for the agent taking actions through its interactions with the environment. In this paper, we extend this setting by considering the environment is not given, but controllable and learnable through its interaction with the agent at the same time. This extension is motivated by environment design scenarios in the real-world, including game design, shopping space design and traffic signal design. Theoretically, we find a dual Markov decision process (MDP) w.r.t. "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.01310","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/1707.01310/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.01310","created_at":"2026-07-05T00:14:28.442681+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.01310v5","created_at":"2026-07-05T00:14:28.442681+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.01310","created_at":"2026-07-05T00:14:28.442681+00:00"},{"alias_kind":"pith_short_12","alias_value":"5WWCZMRJ6GFC","created_at":"2026-07-05T00:14:28.442681+00:00"},{"alias_kind":"pith_short_16","alias_value":"5WWCZMRJ6GFCMUSX","created_at":"2026-07-05T00:14:28.442681+00:00"},{"alias_kind":"pith_short_8","alias_value":"5WWCZMRJ","created_at":"2026-07-05T00:14:28.442681+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP","json":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP.json","graph_json":"https://pith.science/api/pith-number/5WWCZMRJ6GFCMUSXPTTW2GM4WP/graph.json","events_json":"https://pith.science/api/pith-number/5WWCZMRJ6GFCMUSXPTTW2GM4WP/events.json","paper":"https://pith.science/paper/5WWCZMRJ"},"agent_actions":{"view_html":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP","download_json":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP.json","view_paper":"https://pith.science/paper/5WWCZMRJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.01310&json=true","fetch_graph":"https://pith.science/api/pith-number/5WWCZMRJ6GFCMUSXPTTW2GM4WP/graph.json","fetch_events":"https://pith.science/api/pith-number/5WWCZMRJ6GFCMUSXPTTW2GM4WP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP/action/storage_attestation","attest_author":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP/action/author_attestation","sign_citation":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP/action/citation_signature","submit_replication":"https://pith.science/pith/5WWCZMRJ6GFCMUSXPTTW2GM4WP/action/replication_record"}},"created_at":"2026-07-05T00:14:28.442681+00:00","updated_at":"2026-07-05T00:14:28.442681+00:00"}