{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:XLK4HRTGXTL4MFLQHJW6F4SRBT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"94538e1eccf39b8ccc79ef6f8ad8dc5d422e40e771ff0b3bb94aca68f6654a21","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","title_canon_sha256":"ffe054c4bd96efe7a0caa091786bf6e6e535410b9f0f7e80deafb932b3d00ff6"},"schema_version":"1.0","source":{"id":"1210.4893","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1210.4893","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"arxiv_version","alias_value":"1210.4893v1","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1210.4893","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"pith_short_12","alias_value":"XLK4HRTGXTL4","created_at":"2026-05-18T12:27:27Z"},{"alias_kind":"pith_short_16","alias_value":"XLK4HRTGXTL4MFLQ","created_at":"2026-05-18T12:27:27Z"},{"alias_kind":"pith_short_8","alias_value":"XLK4HRTG","created_at":"2026-05-18T12:27:27Z"}],"graph_snapshots":[{"event_id":"sha256:9c214a20e14f2135d476924bf0963d0d7cb572e8c387006bc870b44ada712fcf","target":"graph","created_at":"2026-05-18T03:42:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper explores a new framework for reinforcement learning based on online convex optimization, in particular mirror descent and related algorithms. Mirror descent can be viewed as an enhanced gradient method, particularly suited to minimization of convex functions in highdimensional spaces. Unlike traditional gradient methods, mirror descent undertakes gradient updates of weights in both the dual space and primal space, which are linked together using a Legendre transform. Mirror descent can be viewed as a proximal algorithm where the distance generating function used is a Bregman diverge","authors_text":"Bo Liu, Sridhar Mahadevan","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","title":"Sparse Q-learning with Mirror Descent"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1210.4893","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b6270637446f500d76f50405bed8783b53e535bbe94e3b7871a64a27bd9a32f1","target":"record","created_at":"2026-05-18T03:42:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"94538e1eccf39b8ccc79ef6f8ad8dc5d422e40e771ff0b3bb94aca68f6654a21","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","title_canon_sha256":"ffe054c4bd96efe7a0caa091786bf6e6e535410b9f0f7e80deafb932b3d00ff6"},"schema_version":"1.0","source":{"id":"1210.4893","kind":"arxiv","version":1}},"canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","first_computed_at":"2026-05-18T03:42:55.116255Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:42:55.116255Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yey/YX46dqv4C+ygdrI5QXZgxB/sU3KBRv/WJYPOup+b26Aw474FMOpRP0zHRDqPP2Ik4nmHuLEYC19EypmSDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:42:55.116981Z","signed_message":"canonical_sha256_bytes"},"source_id":"1210.4893","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b6270637446f500d76f50405bed8783b53e535bbe94e3b7871a64a27bd9a32f1","sha256:9c214a20e14f2135d476924bf0963d0d7cb572e8c387006bc870b44ada712fcf"],"state_sha256":"0e32d8f0b38cd83d319f2a5be4df8d5f82248a62e8d46b19ef48f2cb80c38769"}