{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:O7FOWDAIU7RKDBQFT27JUNZG57","short_pith_number":"pith:O7FOWDAI","canonical_record":{"source":{"id":"1902.07393","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2019-02-20T03:52:47Z","cross_cats_sorted":[],"title_canon_sha256":"6471a9b16f5c99ca74352020f883cb51523c1d165d89de36da43503c743e1f95","abstract_canon_sha256":"0b586daa2295c56cd91cce549079eef5798bdf2c8064298b9dd308563bbad0ff"},"schema_version":"1.0"},"canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","source":{"kind":"arxiv","id":"1902.07393","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.07393","created_at":"2026-05-17T23:44:29Z"},{"alias_kind":"arxiv_version","alias_value":"1902.07393v2","created_at":"2026-05-17T23:44:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.07393","created_at":"2026-05-17T23:44:29Z"},{"alias_kind":"pith_short_12","alias_value":"O7FOWDAIU7RK","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"O7FOWDAIU7RKDBQF","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"O7FOWDAI","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:O7FOWDAIU7RKDBQFT27JUNZG57","target":"record","payload":{"canonical_record":{"source":{"id":"1902.07393","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2019-02-20T03:52:47Z","cross_cats_sorted":[],"title_canon_sha256":"6471a9b16f5c99ca74352020f883cb51523c1d165d89de36da43503c743e1f95","abstract_canon_sha256":"0b586daa2295c56cd91cce549079eef5798bdf2c8064298b9dd308563bbad0ff"},"schema_version":"1.0"},"canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:29.316239Z","signature_b64":"ebimlpB1VFLnOq6NmwUj3DpvsZD7bpI8hfZv6GBdj53SwBso34Sdc3yb5n7inZszAyLGeZmkIn6mitsW7aN+Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","last_reissued_at":"2026-05-17T23:44:29.315551Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:29.315551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1902.07393","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UefzKC5rDDn8BDgOM1nCzkony2TnWiHvgB/mTJfnIf+yRQ3PxB66q4s32iNbeB6j6APGcUhAdhJ70OncjOctDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T01:23:15.445144Z"},"content_sha256":"e7985272b72aad33dc03eb36345b748820d1e049d2174eda9f8bbc64f4568338","schema_version":"1.0","event_id":"sha256:e7985272b72aad33dc03eb36345b748820d1e049d2174eda9f8bbc64f4568338"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:O7FOWDAIU7RKDBQFT27JUNZG57","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Finite-Time Analysis of Distributed TD(0) with Linear Function Approximation for Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"math.OC","authors_text":"Justin Romberg, Siva Theja Maguluri, Thinh T. Doan","submitted_at":"2019-02-20T03:52:47Z","abstract_excerpt":"We study the policy evaluation problem in multi-agent reinforcement learning. In this problem, a group of agents works cooperatively to evaluate the value function for the global discounted accumulative reward problem, which is composed of local rewards observed by the agents. Over a series of time steps, the agents act, get rewarded, update their local estimate of the value function, then communicate with their neighbors. The local update at each agent can be interpreted as a distributed consensus-based variant of the popular temporal difference learning algorithm TD(0).\n  While distributed r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.07393","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gcqQJxeNU6bQxp3LxtNbzPXZvoOEi+8O/6VFeISrs7JiS/516BO7h14vTeyf0zGXYeg2qEGFEBXSO96INxsbDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T01:23:15.445848Z"},"content_sha256":"7c3e3709c52c8410b300603141cc7553258f8d6268ee6c1d06324f7874a2f9f9","schema_version":"1.0","event_id":"sha256:7c3e3709c52c8410b300603141cc7553258f8d6268ee6c1d06324f7874a2f9f9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/bundle.json","state_url":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/O7FOWDAIU7RKDBQFT27JUNZG57/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T01:23:15Z","links":{"resolver":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57","bundle":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/bundle.json","state":"https://pith.science/pith/O7FOWDAIU7RKDBQFT27JUNZG57/state.json","well_known_bundle":"https://pith.science/.well-known/pith/O7FOWDAIU7RKDBQFT27JUNZG57/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:O7FOWDAIU7RKDBQFT27JUNZG57","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0b586daa2295c56cd91cce549079eef5798bdf2c8064298b9dd308563bbad0ff","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2019-02-20T03:52:47Z","title_canon_sha256":"6471a9b16f5c99ca74352020f883cb51523c1d165d89de36da43503c743e1f95"},"schema_version":"1.0","source":{"id":"1902.07393","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.07393","created_at":"2026-05-17T23:44:29Z"},{"alias_kind":"arxiv_version","alias_value":"1902.07393v2","created_at":"2026-05-17T23:44:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.07393","created_at":"2026-05-17T23:44:29Z"},{"alias_kind":"pith_short_12","alias_value":"O7FOWDAIU7RK","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"O7FOWDAIU7RKDBQF","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"O7FOWDAI","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:7c3e3709c52c8410b300603141cc7553258f8d6268ee6c1d06324f7874a2f9f9","target":"graph","created_at":"2026-05-17T23:44:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We study the policy evaluation problem in multi-agent reinforcement learning. In this problem, a group of agents works cooperatively to evaluate the value function for the global discounted accumulative reward problem, which is composed of local rewards observed by the agents. Over a series of time steps, the agents act, get rewarded, update their local estimate of the value function, then communicate with their neighbors. The local update at each agent can be interpreted as a distributed consensus-based variant of the popular temporal difference learning algorithm TD(0).\n  While distributed r","authors_text":"Justin Romberg, Siva Theja Maguluri, Thinh T. Doan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2019-02-20T03:52:47Z","title":"Finite-Time Analysis of Distributed TD(0) with Linear Function Approximation for Multi-Agent Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.07393","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e7985272b72aad33dc03eb36345b748820d1e049d2174eda9f8bbc64f4568338","target":"record","created_at":"2026-05-17T23:44:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0b586daa2295c56cd91cce549079eef5798bdf2c8064298b9dd308563bbad0ff","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.OC","submitted_at":"2019-02-20T03:52:47Z","title_canon_sha256":"6471a9b16f5c99ca74352020f883cb51523c1d165d89de36da43503c743e1f95"},"schema_version":"1.0","source":{"id":"1902.07393","kind":"arxiv","version":2}},"canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"77caeb0c08a7e2a186059ebe9a3726efe6d17c59095c8855bfe0c0e55f2a604b","first_computed_at":"2026-05-17T23:44:29.315551Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:29.315551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ebimlpB1VFLnOq6NmwUj3DpvsZD7bpI8hfZv6GBdj53SwBso34Sdc3yb5n7inZszAyLGeZmkIn6mitsW7aN+Aw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:29.316239Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.07393","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e7985272b72aad33dc03eb36345b748820d1e049d2174eda9f8bbc64f4568338","sha256:7c3e3709c52c8410b300603141cc7553258f8d6268ee6c1d06324f7874a2f9f9"],"state_sha256":"aab2a67cec10421a7de184551776881e39399ed38f80ffec912b0c3713a65266"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"P0HACglX4YuqFiP4yoRjkDR8GvKxJhba3O9WBreH7VgXUPBIWCYgHRGdNQPqegN0HEqlxtTj5VDRGKFeKb1YCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T01:23:15.449619Z","bundle_sha256":"4ef5ee2c1a483d739eddf49b482e2542b2526292605e78d1cb5d0cdd46f05287"}}