{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:EHIFKRAWID45HVAS6OD7MEJQCS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1355de6b4147b524ec9564f997b60a63bb11890807b6eba2399c6c3e6f562db6","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-06-24T10:54:41Z","title_canon_sha256":"dbb1dabd7ebc87403b96783c6f0f9e16fd4822193e4a4e2a3fc871aae992554b"},"schema_version":"1.0","source":{"id":"1606.07636","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.07636","created_at":"2026-05-18T00:28:16Z"},{"alias_kind":"arxiv_version","alias_value":"1606.07636v3","created_at":"2026-05-18T00:28:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.07636","created_at":"2026-05-18T00:28:16Z"},{"alias_kind":"pith_short_12","alias_value":"EHIFKRAWID45","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_16","alias_value":"EHIFKRAWID45HVAS","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_8","alias_value":"EHIFKRAW","created_at":"2026-05-18T12:30:12Z"}],"graph_snapshots":[{"event_id":"sha256:a1a2323c06ce9bf852ec89a3c21b4b0fb30f9acff7158e58673d7b0ba51586c7","target":"graph","created_at":"2026-05-18T00:28:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper aims at theoretically and empirically comparing two standard optimization criteria for Reinforcement Learning: i) maximization of the mean value and ii) minimization of the Bellman residual. For that purpose, we place ourselves in the framework of policy search algorithms, that are usually designed to maximize the mean value, and derive a method that minimizes the residual $\\|T_* v_\\pi - v_\\pi\\|_{1,\\nu}$ over policies. A theoretical analysis shows how good this proxy is to policy optimization, and notably that it is better than its value-based counterpart. We also propose experiment","authors_text":"Bilal Piot, Matthieu Geist, Olivier Pietquin","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-06-24T10:54:41Z","title":"Is the Bellman residual a bad proxy?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.07636","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:985e6b667b288d90ddc8bd703e2a21f76cce09a252d8518dc230fccc6ef33ead","target":"record","created_at":"2026-05-18T00:28:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1355de6b4147b524ec9564f997b60a63bb11890807b6eba2399c6c3e6f562db6","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-06-24T10:54:41Z","title_canon_sha256":"dbb1dabd7ebc87403b96783c6f0f9e16fd4822193e4a4e2a3fc871aae992554b"},"schema_version":"1.0","source":{"id":"1606.07636","kind":"arxiv","version":3}},"canonical_sha256":"21d055441640f9d3d412f387f611301498cdf454eaf52b1191b752942feb8904","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"21d055441640f9d3d412f387f611301498cdf454eaf52b1191b752942feb8904","first_computed_at":"2026-05-18T00:28:16.612300Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:28:16.612300Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"P/Pts1De4sSVSYuMemMyJTNmlzX/j4rJVDjkuRyc0Ykm5M6JqyYkyLH8rMX2FgpYB5qXkD27xRBRqlEJORGCAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:28:16.613118Z","signed_message":"canonical_sha256_bytes"},"source_id":"1606.07636","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:985e6b667b288d90ddc8bd703e2a21f76cce09a252d8518dc230fccc6ef33ead","sha256:a1a2323c06ce9bf852ec89a3c21b4b0fb30f9acff7158e58673d7b0ba51586c7"],"state_sha256":"a373a65e71842859d87f29cfa9ae95e7aa3efb2db53867bfb289c89b68e1c02f"}