{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:NETGXAYQ5BGXZ2M7E73Y7GL4SR","short_pith_number":"pith:NETGXAYQ","schema_version":"1.0","canonical_sha256":"69266b8310e84d7ce99f27f78f997c947b0f23329701d07d1149f7a8b2b48b72","source":{"kind":"arxiv","id":"1805.07732","version":3},"attestation_state":"computed","paper":{"title":"Nonlinear Distributional Gradient Temporal-Difference Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Chao Qu, Huan Xu, Shie Mannor","submitted_at":"2018-05-20T08:43:05Z","abstract_excerpt":"We devise a distributional variant of gradient temporal-difference (TD) learning. Distributional reinforcement learning has been demonstrated to outperform the regular one in the recent study \\citep{bellemare2017distributional}. In the policy evaluation setting, we design two new algorithms called distributional GTD2 and distributional TDC using the Cram{\\'e}r distance on the distributional version of the Bellman error objective function, which inherits advantages of both the nonlinear gradient TD algorithms and the distributional RL approach. In the control setting, we propose the distributio"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1805.07732","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-20T08:43:05Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"3e4274f579ad419fce1e9d8b727a437276ab6d32a377b5710587518bd95f2bb3","abstract_canon_sha256":"8a1d560dedf2e5fb2f66190da42ab83ffe0c9fa8b868328f9a4180ea66592063"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:33.603535Z","signature_b64":"lgMXfaKjDX2vhmzBm6F6L38+o15PKih9BKkLMh7jzbpjeQSyFtLlVXl403yBmqzf9ocweg8fWPCNTKoR2YJqBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"69266b8310e84d7ce99f27f78f997c947b0f23329701d07d1149f7a8b2b48b72","last_reissued_at":"2026-05-17T23:49:33.603034Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:33.603034Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Nonlinear Distributional Gradient Temporal-Difference Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Chao Qu, Huan Xu, Shie Mannor","submitted_at":"2018-05-20T08:43:05Z","abstract_excerpt":"We devise a distributional variant of gradient temporal-difference (TD) learning. Distributional reinforcement learning has been demonstrated to outperform the regular one in the recent study \\citep{bellemare2017distributional}. In the policy evaluation setting, we design two new algorithms called distributional GTD2 and distributional TDC using the Cram{\\'e}r distance on the distributional version of the Bellman error objective function, which inherits advantages of both the nonlinear gradient TD algorithms and the distributional RL approach. In the control setting, we propose the distributio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.07732","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1805.07732","created_at":"2026-05-17T23:49:33.603110+00:00"},{"alias_kind":"arxiv_version","alias_value":"1805.07732v3","created_at":"2026-05-17T23:49:33.603110+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.07732","created_at":"2026-05-17T23:49:33.603110+00:00"},{"alias_kind":"pith_short_12","alias_value":"NETGXAYQ5BGX","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_16","alias_value":"NETGXAYQ5BGXZ2M7","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_8","alias_value":"NETGXAYQ","created_at":"2026-05-18T12:32:40.477152+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR","json":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR.json","graph_json":"https://pith.science/api/pith-number/NETGXAYQ5BGXZ2M7E73Y7GL4SR/graph.json","events_json":"https://pith.science/api/pith-number/NETGXAYQ5BGXZ2M7E73Y7GL4SR/events.json","paper":"https://pith.science/paper/NETGXAYQ"},"agent_actions":{"view_html":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR","download_json":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR.json","view_paper":"https://pith.science/paper/NETGXAYQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1805.07732&json=true","fetch_graph":"https://pith.science/api/pith-number/NETGXAYQ5BGXZ2M7E73Y7GL4SR/graph.json","fetch_events":"https://pith.science/api/pith-number/NETGXAYQ5BGXZ2M7E73Y7GL4SR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR/action/storage_attestation","attest_author":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR/action/author_attestation","sign_citation":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR/action/citation_signature","submit_replication":"https://pith.science/pith/NETGXAYQ5BGXZ2M7E73Y7GL4SR/action/replication_record"}},"created_at":"2026-05-17T23:49:33.603110+00:00","updated_at":"2026-05-17T23:49:33.603110+00:00"}