{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:Z66GRHOAG6SHC76J64GBIQLHAQ","short_pith_number":"pith:Z66GRHOA","canonical_record":{"source":{"id":"1709.00503","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-09-01T22:53:03Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"e68a34fb080ef6882c8700e5d9b677b515db42b4bb76112ff7108061fa83a2f8","abstract_canon_sha256":"be54db28c4077ccd8d240190f1221aa2c18dc61748f62e501a191b25eb784d6e"},"schema_version":"1.0"},"canonical_sha256":"cfbc689dc037a4717fc9f70c1441670422410e7ad337d6228347243334a87392","source":{"kind":"arxiv","id":"1709.00503","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.00503","created_at":"2026-05-18T00:15:10Z"},{"alias_kind":"arxiv_version","alias_value":"1709.00503v2","created_at":"2026-05-18T00:15:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.00503","created_at":"2026-05-18T00:15:10Z"},{"alias_kind":"pith_short_12","alias_value":"Z66GRHOAG6SH","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"Z66GRHOAG6SHC76J","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"Z66GRHOA","created_at":"2026-05-18T12:31:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:Z66GRHOAG6SHC76J64GBIQLHAQ","target":"record","payload":{"canonical_record":{"source":{"id":"1709.00503","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-09-01T22:53:03Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"e68a34fb080ef6882c8700e5d9b677b515db42b4bb76112ff7108061fa83a2f8","abstract_canon_sha256":"be54db28c4077ccd8d240190f1221aa2c18dc61748f62e501a191b25eb784d6e"},"schema_version":"1.0"},"canonical_sha256":"cfbc689dc037a4717fc9f70c1441670422410e7ad337d6228347243334a87392","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:15:10.223189Z","signature_b64":"GGZjp9DDMLQKMkC3eAl6dK1aiweO1ow6U7Y4q9oS75L4WRwLQ79wj0+yhQnBZbL9A1NWE6MagJYyn5sN1fcuAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cfbc689dc037a4717fc9f70c1441670422410e7ad337d6228347243334a87392","last_reissued_at":"2026-05-18T00:15:10.222498Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:15:10.222498Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.00503","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:15:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KutWZH2g+2MJ+9UD3KZtYiLFJlbLF9n735JmD187mYJ+qVAQI6qC6Pbvd42j8tP+3jCUnLFGINI917pBF8TUAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T21:07:29.631248Z"},"content_sha256":"23e29f0acd08fdcc7cf9acc011a03f119bb8bd00b42e08018fb030d4b466cc64","schema_version":"1.0","event_id":"sha256:23e29f0acd08fdcc7cf9acc011a03f119bb8bd00b42e08018fb030d4b466cc64"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:Z66GRHOAG6SHC76J64GBIQLHAQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Mean Actor Critic","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Abdel-rahman Mohamed, Cameron Allen, George Konidaris, Kavosh Asadi, Melrose Roderick, Michael Littman","submitted_at":"2017-09-01T22:53:03Z","abstract_excerpt":"We propose a new algorithm, Mean Actor-Critic (MAC), for discrete-action continuous-state reinforcement learning. MAC is a policy gradient algorithm that uses the agent's explicit representation of all action values to estimate the gradient of the policy, rather than using only the actions that were actually executed. We prove that this approach reduces variance in the policy gradient estimate relative to traditional actor-critic methods. We show empirical results on two control domains and on six Atari games, where MAC is competitive with state-of-the-art policy search algorithms."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.00503","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:15:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BHDANfumNoZPngU2kLKdVaFfoaxW1Svly8c5Ay1nnc/XRJjSFpLrGPnYx2cxOlBgSRJ8LbQtBvqyPkJdv5q6Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T21:07:29.631922Z"},"content_sha256":"eb0235316478383d5d5b55829fe06c187f814e9b8d42d09ff6f5cc27acc7c925","schema_version":"1.0","event_id":"sha256:eb0235316478383d5d5b55829fe06c187f814e9b8d42d09ff6f5cc27acc7c925"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Z66GRHOAG6SHC76J64GBIQLHAQ/bundle.json","state_url":"https://pith.science/pith/Z66GRHOAG6SHC76J64GBIQLHAQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Z66GRHOAG6SHC76J64GBIQLHAQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T21:07:29Z","links":{"resolver":"https://pith.science/pith/Z66GRHOAG6SHC76J64GBIQLHAQ","bundle":"https://pith.science/pith/Z66GRHOAG6SHC76J64GBIQLHAQ/bundle.json","state":"https://pith.science/pith/Z66GRHOAG6SHC76J64GBIQLHAQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Z66GRHOAG6SHC76J64GBIQLHAQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:Z66GRHOAG6SHC76J64GBIQLHAQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"be54db28c4077ccd8d240190f1221aa2c18dc61748f62e501a191b25eb784d6e","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-09-01T22:53:03Z","title_canon_sha256":"e68a34fb080ef6882c8700e5d9b677b515db42b4bb76112ff7108061fa83a2f8"},"schema_version":"1.0","source":{"id":"1709.00503","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.00503","created_at":"2026-05-18T00:15:10Z"},{"alias_kind":"arxiv_version","alias_value":"1709.00503v2","created_at":"2026-05-18T00:15:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.00503","created_at":"2026-05-18T00:15:10Z"},{"alias_kind":"pith_short_12","alias_value":"Z66GRHOAG6SH","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"Z66GRHOAG6SHC76J","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"Z66GRHOA","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:eb0235316478383d5d5b55829fe06c187f814e9b8d42d09ff6f5cc27acc7c925","target":"graph","created_at":"2026-05-18T00:15:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose a new algorithm, Mean Actor-Critic (MAC), for discrete-action continuous-state reinforcement learning. MAC is a policy gradient algorithm that uses the agent's explicit representation of all action values to estimate the gradient of the policy, rather than using only the actions that were actually executed. We prove that this approach reduces variance in the policy gradient estimate relative to traditional actor-critic methods. We show empirical results on two control domains and on six Atari games, where MAC is competitive with state-of-the-art policy search algorithms.","authors_text":"Abdel-rahman Mohamed, Cameron Allen, George Konidaris, Kavosh Asadi, Melrose Roderick, Michael Littman","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-09-01T22:53:03Z","title":"Mean Actor Critic"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.00503","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:23e29f0acd08fdcc7cf9acc011a03f119bb8bd00b42e08018fb030d4b466cc64","target":"record","created_at":"2026-05-18T00:15:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"be54db28c4077ccd8d240190f1221aa2c18dc61748f62e501a191b25eb784d6e","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-09-01T22:53:03Z","title_canon_sha256":"e68a34fb080ef6882c8700e5d9b677b515db42b4bb76112ff7108061fa83a2f8"},"schema_version":"1.0","source":{"id":"1709.00503","kind":"arxiv","version":2}},"canonical_sha256":"cfbc689dc037a4717fc9f70c1441670422410e7ad337d6228347243334a87392","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cfbc689dc037a4717fc9f70c1441670422410e7ad337d6228347243334a87392","first_computed_at":"2026-05-18T00:15:10.222498Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:15:10.222498Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"GGZjp9DDMLQKMkC3eAl6dK1aiweO1ow6U7Y4q9oS75L4WRwLQ79wj0+yhQnBZbL9A1NWE6MagJYyn5sN1fcuAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:15:10.223189Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.00503","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:23e29f0acd08fdcc7cf9acc011a03f119bb8bd00b42e08018fb030d4b466cc64","sha256:eb0235316478383d5d5b55829fe06c187f814e9b8d42d09ff6f5cc27acc7c925"],"state_sha256":"549e34f6a7a7e66334f3e84007ef0fb1407d6cfbb45012d207e5a94b3a85d243"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I6Bm/QOQxGjH+SCcmS9n0voKy2zCrhyCIAohEiISL8GsPQ24y3UYgP19glOUIIDGB/RVyctLPRhoY0UhECU5BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T21:07:29.635134Z","bundle_sha256":"609a6e36442a2e8cfbec6ac6a2582567c179db0788f16db4deae1465cb97e4f4"}}