{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:O7FOWDAIU7RKDBQFT27JUNZG57","short_pith_number":"pith:O7FOWDAI","schema_version":"1.0","canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","source":{"kind":"arxiv","id":"1902.07393","version":2},"attestation_state":"computed","paper":{"title":"Finite-Time Analysis of Distributed TD(0) with Linear Function Approximation for Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"math.OC","authors_text":"Justin Romberg, Siva Theja Maguluri, Thinh T. Doan","submitted_at":"2019-02-20T03:52:47Z","abstract_excerpt":"We study the policy evaluation problem in multi-agent reinforcement learning. In this problem, a group of agents works cooperatively to evaluate the value function for the global discounted accumulative reward problem, which is composed of local rewards observed by the agents. Over a series of time steps, the agents act, get rewarded, update their local estimate of the value function, then communicate with their neighbors. The local update at each agent can be interpreted as a distributed consensus-based variant of the popular temporal difference learning algorithm TD(0).\n  While distributed r"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.07393","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2019-02-20T03:52:47Z","cross_cats_sorted":[],"title_canon_sha256":"6471a9b16f5c99ca74352020f883cb51523c1d165d89de36da43503c743e1f95","abstract_canon_sha256":"0b586daa2295c56cd91cce549079eef5798bdf2c8064298b9dd308563bbad0ff"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:29.316239Z","signature_b64":"ebimlpB1VFLnOq6NmwUj3DpvsZD7bpI8hfZv6GBdj53SwBso34Sdc3yb5n7inZszAyLGeZmkIn6mitsW7aN+Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","last_reissued_at":"2026-05-17T23:44:29.315551Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:29.315551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Finite-Time Analysis of Distributed TD(0) with Linear Function Approximation for Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"math.OC","authors_text":"Justin Romberg, Siva Theja Maguluri, Thinh T. Doan","submitted_at":"2019-02-20T03:52:47Z","abstract_excerpt":"We study the policy evaluation problem in multi-agent reinforcement learning. In this problem, a group of agents works cooperatively to evaluate the value function for the global discounted accumulative reward problem, which is composed of local rewards observed by the agents. Over a series of time steps, the agents act, get rewarded, update their local estimate of the value function, then communicate with their neighbors. The local update at each agent can be interpreted as a distributed consensus-based variant of the popular temporal difference learning algorithm TD(0).\n  While distributed r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.07393","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.07393","created_at":"2026-05-17T23:44:29.315651+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.07393v2","created_at":"2026-05-17T23:44:29.315651+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.07393","created_at":"2026-05-17T23:44:29.315651+00:00"},{"alias_kind":"pith_short_12","alias_value":"O7FOWDAIU7RK","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_16","alias_value":"O7FOWDAIU7RKDBQF","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_8","alias_value":"O7FOWDAI","created_at":"2026-05-18T12:33:24.271573+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.03053","citing_title":"A Communication-Efficient Multi-Agent Actor-Critic Algorithm for Distributed Reinforcement Learning","ref_index":12,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57","json":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57.json","graph_json":"https://pith.science/api/pith-number/O7FOWDAIU7RKDBQFT27JUNZG57/graph.json","events_json":"https://pith.science/api/pith-number/O7FOWDAIU7RKDBQFT27JUNZG57/events.json","paper":"https://pith.science/paper/O7FOWDAI"},"agent_actions":{"view_html":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57","download_json":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57.json","view_paper":"https://pith.science/paper/O7FOWDAI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.07393&json=true","fetch_graph":"https://pith.science/api/pith-number/O7FOWDAIU7RKDBQFT27JUNZG57/graph.json","fetch_events":"https://pith.science/api/pith-number/O7FOWDAIU7RKDBQFT27JUNZG57/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/action/timestamp_anchor","attest_storage":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/action/storage_attestation","attest_author":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/action/author_attestation","sign_citation":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/action/citation_signature","submit_replication":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/action/replication_record"}},"created_at":"2026-05-17T23:44:29.315651+00:00","updated_at":"2026-05-17T23:44:29.315651+00:00"}