{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:KEMWCFK63KIEULAR6ODLVSUHXM","short_pith_number":"pith:KEMWCFK6","canonical_record":{"source":{"id":"1712.10285","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-29T17:27:59Z","cross_cats_sorted":[],"title_canon_sha256":"fb5b2d54b53e34c6b544dcfc9be636055f5b8a7431fa139b43965c3a6b39dd21","abstract_canon_sha256":"a74d63ae0d79b87112db3bb22bb77d8235132ca906d35747269e1512d86e131b"},"schema_version":"1.0"},"canonical_sha256":"511961155eda904a2c11f386baca87bb0796809aff761478f30658e6b7a0693c","source":{"kind":"arxiv","id":"1712.10285","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.10285","created_at":"2026-05-18T00:14:06Z"},{"alias_kind":"arxiv_version","alias_value":"1712.10285v4","created_at":"2026-05-18T00:14:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.10285","created_at":"2026-05-18T00:14:06Z"},{"alias_kind":"pith_short_12","alias_value":"KEMWCFK63KIE","created_at":"2026-05-18T12:31:24Z"},{"alias_kind":"pith_short_16","alias_value":"KEMWCFK63KIEULAR","created_at":"2026-05-18T12:31:24Z"},{"alias_kind":"pith_short_8","alias_value":"KEMWCFK6","created_at":"2026-05-18T12:31:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:KEMWCFK63KIEULAR6ODLVSUHXM","target":"record","payload":{"canonical_record":{"source":{"id":"1712.10285","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-29T17:27:59Z","cross_cats_sorted":[],"title_canon_sha256":"fb5b2d54b53e34c6b544dcfc9be636055f5b8a7431fa139b43965c3a6b39dd21","abstract_canon_sha256":"a74d63ae0d79b87112db3bb22bb77d8235132ca906d35747269e1512d86e131b"},"schema_version":"1.0"},"canonical_sha256":"511961155eda904a2c11f386baca87bb0796809aff761478f30658e6b7a0693c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:14:06.458110Z","signature_b64":"QVE8KPvHV0tPsrQTfXocZM3d9griGkH8pRrFhTBlwf58zFFFLx+VGDPIlmmC2VxRVqX5iyvxotpqxXKrtzPwDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"511961155eda904a2c11f386baca87bb0796809aff761478f30658e6b7a0693c","last_reissued_at":"2026-05-18T00:14:06.457400Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:14:06.457400Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.10285","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9pp7ct9U6HMmSGB1Ue+c+ZK/eEq3O8SJl0qh2MGOAeDakyUaNk/YQpzlcLrq0VpWPXDFhoqUpU5IODNyukAxCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T02:43:23.503406Z"},"content_sha256":"32e5bbef386829cce6b4ce034c41eab59d9f2d350a72fed2ce5e7561a4d851ce","schema_version":"1.0","event_id":"sha256:32e5bbef386829cce6b4ce034c41eab59d9f2d350a72fed2ce5e7561a4d851ce"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:KEMWCFK63KIEULAR6ODLVSUHXM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SBEED: Convergent Reinforcement Learning with Nonlinear Function Approximation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Albert Shaw, Bo Dai, Jianshu Chen, Le Song, Lihong Li, Lin Xiao, Niao He, Zhen Liu","submitted_at":"2017-12-29T17:27:59Z","abstract_excerpt":"When function approximation is used, solving the Bellman optimality equation with stability guarantees has remained a major open problem in reinforcement learning for decades. The fundamental difficulty is that the Bellman operator may become an expansion in general, resulting in oscillating and even divergent behavior of popular algorithms like Q-learning. In this paper, we revisit the Bellman equation, and reformulate it into a novel primal-dual optimization problem using Nesterov's smoothing technique and the Legendre-Fenchel transformation. We then develop a new algorithm, called Smoothed "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.10285","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hKapY3lcswxM/QZkBRmdMpCn/mIZDQkkhDodkSQMVMe6l4U7bHcxaUimqyUT2nxkpaY0xZadv4b4b+ZNNRaeDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T02:43:23.503888Z"},"content_sha256":"99e36bb5b409f0d3ab54a619164cb335d19dedb7e0d04a69faf137870579cccf","schema_version":"1.0","event_id":"sha256:99e36bb5b409f0d3ab54a619164cb335d19dedb7e0d04a69faf137870579cccf"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KEMWCFK63KIEULAR6ODLVSUHXM/bundle.json","state_url":"https://pith.science/pith/KEMWCFK63KIEULAR6ODLVSUHXM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KEMWCFK63KIEULAR6ODLVSUHXM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T02:43:23Z","links":{"resolver":"https://pith.science/pith/KEMWCFK63KIEULAR6ODLVSUHXM","bundle":"https://pith.science/pith/KEMWCFK63KIEULAR6ODLVSUHXM/bundle.json","state":"https://pith.science/pith/KEMWCFK63KIEULAR6ODLVSUHXM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KEMWCFK63KIEULAR6ODLVSUHXM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:KEMWCFK63KIEULAR6ODLVSUHXM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a74d63ae0d79b87112db3bb22bb77d8235132ca906d35747269e1512d86e131b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-29T17:27:59Z","title_canon_sha256":"fb5b2d54b53e34c6b544dcfc9be636055f5b8a7431fa139b43965c3a6b39dd21"},"schema_version":"1.0","source":{"id":"1712.10285","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.10285","created_at":"2026-05-18T00:14:06Z"},{"alias_kind":"arxiv_version","alias_value":"1712.10285v4","created_at":"2026-05-18T00:14:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.10285","created_at":"2026-05-18T00:14:06Z"},{"alias_kind":"pith_short_12","alias_value":"KEMWCFK63KIE","created_at":"2026-05-18T12:31:24Z"},{"alias_kind":"pith_short_16","alias_value":"KEMWCFK63KIEULAR","created_at":"2026-05-18T12:31:24Z"},{"alias_kind":"pith_short_8","alias_value":"KEMWCFK6","created_at":"2026-05-18T12:31:24Z"}],"graph_snapshots":[{"event_id":"sha256:99e36bb5b409f0d3ab54a619164cb335d19dedb7e0d04a69faf137870579cccf","target":"graph","created_at":"2026-05-18T00:14:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"When function approximation is used, solving the Bellman optimality equation with stability guarantees has remained a major open problem in reinforcement learning for decades. The fundamental difficulty is that the Bellman operator may become an expansion in general, resulting in oscillating and even divergent behavior of popular algorithms like Q-learning. In this paper, we revisit the Bellman equation, and reformulate it into a novel primal-dual optimization problem using Nesterov's smoothing technique and the Legendre-Fenchel transformation. We then develop a new algorithm, called Smoothed ","authors_text":"Albert Shaw, Bo Dai, Jianshu Chen, Le Song, Lihong Li, Lin Xiao, Niao He, Zhen Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-29T17:27:59Z","title":"SBEED: Convergent Reinforcement Learning with Nonlinear Function Approximation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.10285","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:32e5bbef386829cce6b4ce034c41eab59d9f2d350a72fed2ce5e7561a4d851ce","target":"record","created_at":"2026-05-18T00:14:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a74d63ae0d79b87112db3bb22bb77d8235132ca906d35747269e1512d86e131b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-29T17:27:59Z","title_canon_sha256":"fb5b2d54b53e34c6b544dcfc9be636055f5b8a7431fa139b43965c3a6b39dd21"},"schema_version":"1.0","source":{"id":"1712.10285","kind":"arxiv","version":4}},"canonical_sha256":"511961155eda904a2c11f386baca87bb0796809aff761478f30658e6b7a0693c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"511961155eda904a2c11f386baca87bb0796809aff761478f30658e6b7a0693c","first_computed_at":"2026-05-18T00:14:06.457400Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:14:06.457400Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QVE8KPvHV0tPsrQTfXocZM3d9griGkH8pRrFhTBlwf58zFFFLx+VGDPIlmmC2VxRVqX5iyvxotpqxXKrtzPwDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:14:06.458110Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.10285","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:32e5bbef386829cce6b4ce034c41eab59d9f2d350a72fed2ce5e7561a4d851ce","sha256:99e36bb5b409f0d3ab54a619164cb335d19dedb7e0d04a69faf137870579cccf"],"state_sha256":"c1490c2b0da47b2f56fc4f76044fcd06bc601741fa559faae12f12cb101f927c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vaCLfvWMuQd17s6k+2hKR9Rc/f9ny4CA18s0EUqkH7Z97rPo4H6ev1mBmKUBkVOY3cPR8KOXHJcCTN3GNMy+CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T02:43:23.507568Z","bundle_sha256":"243018e6b3d2a9cdcaedd244808696ae1d9756b29f9a6e8af5a4aa96aba21b2f"}}