{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:DCVKGGOD65DUKCQ4VXTEUKTWWG","short_pith_number":"pith:DCVKGGOD","canonical_record":{"source":{"id":"1711.09874","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-27T18:46:00Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"318594fefb17aa06e34875668a5c7ca13e661b08181a538e0495e1c7343e8e23","abstract_canon_sha256":"ab3e17a35f6aa007277fde459bee02a410e02ef5bb39da32404feae80a3eab0c"},"schema_version":"1.0"},"canonical_sha256":"18aaa319c3f747450a1cade64a2a76b1ba80f24eddcc5ff27c247cf526f84ea2","source":{"kind":"arxiv","id":"1711.09874","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.09874","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"1711.09874v2","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.09874","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"DCVKGGOD65DU","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_16","alias_value":"DCVKGGOD65DUKCQ4","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_8","alias_value":"DCVKGGOD","created_at":"2026-05-18T12:31:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:DCVKGGOD65DUKCQ4VXTEUKTWWG","target":"record","payload":{"canonical_record":{"source":{"id":"1711.09874","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-27T18:46:00Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"318594fefb17aa06e34875668a5c7ca13e661b08181a538e0495e1c7343e8e23","abstract_canon_sha256":"ab3e17a35f6aa007277fde459bee02a410e02ef5bb39da32404feae80a3eab0c"},"schema_version":"1.0"},"canonical_sha256":"18aaa319c3f747450a1cade64a2a76b1ba80f24eddcc5ff27c247cf526f84ea2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:24.015460Z","signature_b64":"ulbBVUY1y4nGZtwXchNlNrOWCSk38WViTCLD98yq10LaE7ctH5F6Aj69+0VN39ohYYx9C9OspXGFia5DzoqnBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"18aaa319c3f747450a1cade64a2a76b1ba80f24eddcc5ff27c247cf526f84ea2","last_reissued_at":"2026-05-18T00:17:24.014996Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:24.014996Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1711.09874","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:17:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"An/kYpVpeb4QJMMU0191BtmGDqrtJVB8Kwb0h8uCQDUTQjgF4Z+1mxb5Qw1ONRiuSbaZX2aibfEV0Cu6/Ec5CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T15:01:18.272112Z"},"content_sha256":"cbfd48b70c3b16b7d5d8b8a5aec98525d411b97ab968ae11a9259d42cd2c4f68","schema_version":"1.0","event_id":"sha256:cbfd48b70c3b16b7d5d8b8a5aec98525d411b97ab968ae11a9259d42cd2c4f68"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:DCVKGGOD65DUKCQ4VXTEUKTWWG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Divide-and-Conquer Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.LG","authors_text":"Aravind Rajeswaran, Avi Singh, Dibya Ghosh, Sergey Levine, Vikash Kumar","submitted_at":"2017-11-27T18:46:00Z","abstract_excerpt":"Standard model-free deep reinforcement learning (RL) algorithms sample a new initial state for each trial, allowing them to optimize policies that can perform well even in highly stochastic environments. However, problems that exhibit considerable initial state variation typically produce high-variance gradient estimates for model-free RL, making direct policy or value function optimization challenging. In this paper, we develop a novel algorithm that instead partitions the initial state space into \"slices\", and optimizes an ensemble of policies, each on a different slice. The ensemble is grad"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.09874","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:17:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bn+6JXhjXYqiPvmEsE9XW3KkWc0MgQ1hcSUkLsQCn8lQUvX/mHxtFTvcpBd1spCLSDDVWsnWctA4SClqA2W6AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T15:01:18.272829Z"},"content_sha256":"2e972fa6ef1cbfd982cffc2a28859d20f4a0be3e34d997db5b45bab33608c7e1","schema_version":"1.0","event_id":"sha256:2e972fa6ef1cbfd982cffc2a28859d20f4a0be3e34d997db5b45bab33608c7e1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG/bundle.json","state_url":"https://pith.science/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T15:01:18Z","links":{"resolver":"https://pith.science/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG","bundle":"https://pith.science/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG/bundle.json","state":"https://pith.science/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DCVKGGOD65DUKCQ4VXTEUKTWWG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:DCVKGGOD65DUKCQ4VXTEUKTWWG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ab3e17a35f6aa007277fde459bee02a410e02ef5bb39da32404feae80a3eab0c","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-27T18:46:00Z","title_canon_sha256":"318594fefb17aa06e34875668a5c7ca13e661b08181a538e0495e1c7343e8e23"},"schema_version":"1.0","source":{"id":"1711.09874","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.09874","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"1711.09874v2","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.09874","created_at":"2026-05-18T00:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"DCVKGGOD65DU","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_16","alias_value":"DCVKGGOD65DUKCQ4","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_8","alias_value":"DCVKGGOD","created_at":"2026-05-18T12:31:10Z"}],"graph_snapshots":[{"event_id":"sha256:2e972fa6ef1cbfd982cffc2a28859d20f4a0be3e34d997db5b45bab33608c7e1","target":"graph","created_at":"2026-05-18T00:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Standard model-free deep reinforcement learning (RL) algorithms sample a new initial state for each trial, allowing them to optimize policies that can perform well even in highly stochastic environments. However, problems that exhibit considerable initial state variation typically produce high-variance gradient estimates for model-free RL, making direct policy or value function optimization challenging. In this paper, we develop a novel algorithm that instead partitions the initial state space into \"slices\", and optimizes an ensemble of policies, each on a different slice. The ensemble is grad","authors_text":"Aravind Rajeswaran, Avi Singh, Dibya Ghosh, Sergey Levine, Vikash Kumar","cross_cats":["cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-27T18:46:00Z","title":"Divide-and-Conquer Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.09874","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cbfd48b70c3b16b7d5d8b8a5aec98525d411b97ab968ae11a9259d42cd2c4f68","target":"record","created_at":"2026-05-18T00:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ab3e17a35f6aa007277fde459bee02a410e02ef5bb39da32404feae80a3eab0c","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-27T18:46:00Z","title_canon_sha256":"318594fefb17aa06e34875668a5c7ca13e661b08181a538e0495e1c7343e8e23"},"schema_version":"1.0","source":{"id":"1711.09874","kind":"arxiv","version":2}},"canonical_sha256":"18aaa319c3f747450a1cade64a2a76b1ba80f24eddcc5ff27c247cf526f84ea2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"18aaa319c3f747450a1cade64a2a76b1ba80f24eddcc5ff27c247cf526f84ea2","first_computed_at":"2026-05-18T00:17:24.014996Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:17:24.014996Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ulbBVUY1y4nGZtwXchNlNrOWCSk38WViTCLD98yq10LaE7ctH5F6Aj69+0VN39ohYYx9C9OspXGFia5DzoqnBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:17:24.015460Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.09874","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cbfd48b70c3b16b7d5d8b8a5aec98525d411b97ab968ae11a9259d42cd2c4f68","sha256:2e972fa6ef1cbfd982cffc2a28859d20f4a0be3e34d997db5b45bab33608c7e1"],"state_sha256":"4b2d99ebdfea397befa6ca8e83d0ab2245b1b2126e58a05b1aa79ec84da60a2b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yaLU39idP58GUm4iO9NPPaAol+kZzbsSKQ+CWxd0PJmYKF2fPwl4/Py2PQOg+SqGGrmkgY7Q+xVYG5PhSd3yBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T15:01:18.276815Z","bundle_sha256":"f55fe4c83c3488d28b46ea1515b74f6486f2fc06a2df7e2ab2e0f16a7ac443fb"}}