{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:CKHJTZ3XHOTHFYBYAAV3YC4A5X","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"16451f184f26c65ada39674d47b8b1a46977d12518d8a83cadae2ecea14bc045","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-06-23T16:58:33Z","title_canon_sha256":"7747ce6c768286cad056f0ecae19639e64a49e77adca117b39f5133feca1200d"},"schema_version":"1.0","source":{"id":"1606.07374","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.07374","created_at":"2026-05-18T01:10:47Z"},{"alias_kind":"arxiv_version","alias_value":"1606.07374v2","created_at":"2026-05-18T01:10:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.07374","created_at":"2026-05-18T01:10:47Z"},{"alias_kind":"pith_short_12","alias_value":"CKHJTZ3XHOTH","created_at":"2026-05-18T12:30:09Z"},{"alias_kind":"pith_short_16","alias_value":"CKHJTZ3XHOTHFYBY","created_at":"2026-05-18T12:30:09Z"},{"alias_kind":"pith_short_8","alias_value":"CKHJTZ3X","created_at":"2026-05-18T12:30:09Z"}],"graph_snapshots":[{"event_id":"sha256:130115885901658052164f028fc6e796dd39c59c4b1ce806436452033f4ea72b","target":"graph","created_at":"2026-05-18T01:10:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Szubert and Jaskowski successfully used temporal difference (TD) learning together with n-tuple networks for playing the game 2048. However, we observed a phenomenon that the programs based on TD learning still hardly reach large tiles. In this paper, we propose multi-stage TD (MS-TD) learning, a kind of hierarchical reinforcement learning method, to effectively improve the performance for the rates of reaching large tiles, which are good metrics to analyze the strength of 2048 programs. Our experiments showed significant improvements over the one without using MS-TD learning. Namely, using 3-","authors_text":"Chao-Chin Liang, Chia-Chuan Chang, Chu-Hsuan Hsueh, Han Chiang, I-Chen Wu, Kun-Hao Yeh","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-06-23T16:58:33Z","title":"Multi-Stage Temporal Difference Learning for 2048-like Games"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.07374","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4748eb7425644bc925487dacbffde61d88473ce8a38de3af83a585e9c0a74504","target":"record","created_at":"2026-05-18T01:10:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"16451f184f26c65ada39674d47b8b1a46977d12518d8a83cadae2ecea14bc045","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-06-23T16:58:33Z","title_canon_sha256":"7747ce6c768286cad056f0ecae19639e64a49e77adca117b39f5133feca1200d"},"schema_version":"1.0","source":{"id":"1606.07374","kind":"arxiv","version":2}},"canonical_sha256":"128e99e7773ba672e038002bbc0b80edf1a3302fe6514624f4a5ffac66f47628","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"128e99e7773ba672e038002bbc0b80edf1a3302fe6514624f4a5ffac66f47628","first_computed_at":"2026-05-18T01:10:47.134900Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:10:47.134900Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"X/ZNZGr+qX3jP2GrYwCtmazhtp/YlNwb5DxMFxQ2NmwM8eut135vIJh/Yylbit1oy3lnSem2dfE629sRAvh+BQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:10:47.135512Z","signed_message":"canonical_sha256_bytes"},"source_id":"1606.07374","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4748eb7425644bc925487dacbffde61d88473ce8a38de3af83a585e9c0a74504","sha256:130115885901658052164f028fc6e796dd39c59c4b1ce806436452033f4ea72b"],"state_sha256":"da4cc2008a672fe0032988778248a1b79c5987471dce0745a581ad29349943d5"}