{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:TRCYHIZY247YGH2DP2BCEZV66L","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f601a0c0f3dce0aa45ffd202a827477fd77441751664bd09b013ffc0b696f405","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-10T06:17:53Z","title_canon_sha256":"9b6db23c801317ef612147a86f6c8a49bda1d73df2d9b47a377bd740f6d74192"},"schema_version":"1.0","source":{"id":"1806.06914","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.06914","created_at":"2026-05-18T00:12:58Z"},{"alias_kind":"arxiv_version","alias_value":"1806.06914v1","created_at":"2026-05-18T00:12:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.06914","created_at":"2026-05-18T00:12:58Z"},{"alias_kind":"pith_short_12","alias_value":"TRCYHIZY247Y","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"TRCYHIZY247YGH2D","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"TRCYHIZY","created_at":"2026-05-18T12:32:56Z"}],"graph_snapshots":[{"event_id":"sha256:d8e5e08de57726900dcc8a3fe9f1d586b95855a03f6919d3cb1d40e79a8ae901","target":"graph","created_at":"2026-05-18T00:12:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In traditional reinforcement learning, an agent maximizes the reward collected during its interaction with the environment by approximating the optimal policy through the estimation of value functions. Typically, given a state s and action a, the corresponding value is the expected discounted sum of rewards. The optimal action is then chosen to be the action a with the largest value estimated by value function. However, recent developments have shown both theoretical and experimental evidence of superior performance when value function is replaced with value distribution in context of deep Q l","authors_text":"Selina Bing, Shangda Li, Steven Yang","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-10T06:17:53Z","title":"Distributional Advantage Actor-Critic"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.06914","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:35dafd7775d3fed2cff0957f7ce88de3b4aa43b68b7661094e6bb5d65703bebc","target":"record","created_at":"2026-05-18T00:12:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f601a0c0f3dce0aa45ffd202a827477fd77441751664bd09b013ffc0b696f405","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-10T06:17:53Z","title_canon_sha256":"9b6db23c801317ef612147a86f6c8a49bda1d73df2d9b47a377bd740f6d74192"},"schema_version":"1.0","source":{"id":"1806.06914","kind":"arxiv","version":1}},"canonical_sha256":"9c4583a338d73f831f437e822266bef2d268b7b241079bcb160b9ba8dc6cb9c9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9c4583a338d73f831f437e822266bef2d268b7b241079bcb160b9ba8dc6cb9c9","first_computed_at":"2026-05-18T00:12:58.106179Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:12:58.106179Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FeQ+JoiMjCclQHzKp8og/AiBY8SQweffttJ1KJ6xI7O23m4ex5exd+xCylQ1DBQ8NTAOS0a25iXK38M67Z2wDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:12:58.106823Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.06914","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:35dafd7775d3fed2cff0957f7ce88de3b4aa43b68b7661094e6bb5d65703bebc","sha256:d8e5e08de57726900dcc8a3fe9f1d586b95855a03f6919d3cb1d40e79a8ae901"],"state_sha256":"fa4d20a35460434bc57ca10ff5f31cec63b8d242764635496a91f9d4e87dcc08"}