{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:A7XVV5MNREPWEEBENR6SIGDVDE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9fb330787100c9542d30ff1b9d88d59acb4d22c9808bdc2a9fe070ed60be2f5f","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-26T17:54:49Z","title_canon_sha256":"4f79674a0374b3a99bbf08a21a9cc1a9988de5a28a63721151c87e960682f02e"},"schema_version":"1.0","source":{"id":"1802.09477","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.09477","created_at":"2026-05-18T00:02:45Z"},{"alias_kind":"arxiv_version","alias_value":"1802.09477v3","created_at":"2026-05-18T00:02:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.09477","created_at":"2026-05-18T00:02:45Z"},{"alias_kind":"pith_short_12","alias_value":"A7XVV5MNREPW","created_at":"2026-05-18T12:32:13Z"},{"alias_kind":"pith_short_16","alias_value":"A7XVV5MNREPWEEBE","created_at":"2026-05-18T12:32:13Z"},{"alias_kind":"pith_short_8","alias_value":"A7XVV5MN","created_at":"2026-05-18T12:32:13Z"}],"graph_snapshots":[{"event_id":"sha256:ca446bc7ad8c6d88531444cc5394380ec8bc809c9ba1ec9b0d4bcb72b833c7f7","target":"graph","created_at":"2026-05-18T00:02:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In value-based reinforcement learning methods such as deep Q-learning, function approximation errors are known to lead to overestimated value estimates and suboptimal policies. We show that this problem persists in an actor-critic setting and propose novel mechanisms to minimize its effects on both the actor and the critic. Our algorithm builds on Double Q-learning, by taking the minimum value between a pair of critics to limit overestimation. We draw the connection between target networks and overestimation bias, and suggest delaying policy updates to reduce per-update error and further impro","authors_text":"David Meger, Herke van Hoof, Scott Fujimoto","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-26T17:54:49Z","title":"Addressing Function Approximation Error in Actor-Critic Methods"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.09477","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9891f59347341987c1295535f8237f25e651ea1640eb0ab7f0738a5511286398","target":"record","created_at":"2026-05-18T00:02:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9fb330787100c9542d30ff1b9d88d59acb4d22c9808bdc2a9fe070ed60be2f5f","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-02-26T17:54:49Z","title_canon_sha256":"4f79674a0374b3a99bbf08a21a9cc1a9988de5a28a63721151c87e960682f02e"},"schema_version":"1.0","source":{"id":"1802.09477","kind":"arxiv","version":3}},"canonical_sha256":"07ef5af58d891f6210246c7d241875190a39605b8860d66cd160a0cf9e569857","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"07ef5af58d891f6210246c7d241875190a39605b8860d66cd160a0cf9e569857","first_computed_at":"2026-05-18T00:02:45.178554Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:02:45.178554Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0nC1hskNRuJ1N3zAdOIqEKGMzzd3/8DTvLJBxnGyJo9taRTdB+x9fL0xYHPQl9lkeYRSnMFvi3DZ/mmo4VMyDw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:02:45.178978Z","signed_message":"canonical_sha256_bytes"},"source_id":"1802.09477","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9891f59347341987c1295535f8237f25e651ea1640eb0ab7f0738a5511286398","sha256:ca446bc7ad8c6d88531444cc5394380ec8bc809c9ba1ec9b0d4bcb72b833c7f7"],"state_sha256":"8e9588debf42bfa8aa07fe081e901f259d3b262f69f6e375e332f64db7fa9595"}