{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:7VDVBWG53QIRCODHPOXVWZ4AXX","short_pith_number":"pith:7VDVBWG5","canonical_record":{"source":{"id":"1906.10667","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-25T17:04:48Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"526ff948978e6871cae92755af1a42446a03b0302335588e2aa2dff825796229","abstract_canon_sha256":"40f3f5a6edf5dec9f1bea61c808d06bf49cad340b78680680a1b951333545893"},"schema_version":"1.0"},"canonical_sha256":"fd4750d8dddc111138677baf5b6780bdd7a27da1a0d74bf61c207d7c5e9be4da","source":{"kind":"arxiv","id":"1906.10667","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.10667","created_at":"2026-05-17T23:42:15Z"},{"alias_kind":"arxiv_version","alias_value":"1906.10667v1","created_at":"2026-05-17T23:42:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.10667","created_at":"2026-05-17T23:42:15Z"},{"alias_kind":"pith_short_12","alias_value":"7VDVBWG53QIR","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"7VDVBWG53QIRCODH","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"7VDVBWG5","created_at":"2026-05-18T12:33:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:7VDVBWG53QIRCODHPOXVWZ4AXX","target":"record","payload":{"canonical_record":{"source":{"id":"1906.10667","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-25T17:04:48Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"526ff948978e6871cae92755af1a42446a03b0302335588e2aa2dff825796229","abstract_canon_sha256":"40f3f5a6edf5dec9f1bea61c808d06bf49cad340b78680680a1b951333545893"},"schema_version":"1.0"},"canonical_sha256":"fd4750d8dddc111138677baf5b6780bdd7a27da1a0d74bf61c207d7c5e9be4da","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:15.652675Z","signature_b64":"0YaAUKsLz3QJA42LuRjNaY+b6qr6igWqgPl+roles0H0BtO88bMgfNy85agIxYPzee/EGmqzg42Y+KXa6C28DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fd4750d8dddc111138677baf5b6780bdd7a27da1a0d74bf61c207d7c5e9be4da","last_reissued_at":"2026-05-17T23:42:15.652014Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:15.652014Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.10667","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:42:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YywxdYsaQTtDpB0ta9YZfPJqQVV63d8+EOr081m/Mu3Jf8B4E9fk1ivtF/FIqi/E75TcyevnSHUbbOKKt7nIDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:00:27.400164Z"},"content_sha256":"b4dd6e52eae8c9d7c10c5e118a763acd022c1e1ba20ad1a1c670c1dba8242ebb","schema_version":"1.0","event_id":"sha256:b4dd6e52eae8c9d7c10c5e118a763acd022c1e1ba20ad1a1c670c1dba8242ebb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:7VDVBWG53QIRCODHPOXVWZ4AXX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning with Competitive Ensembles of Information-Constrained Primitives","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anirudh Goyal, Jonathan Binas, Sergey Levine, Shagun Sodhani, Xue Bin Peng, Yoshua Bengio","submitted_at":"2019-06-25T17:04:48Z","abstract_excerpt":"Reinforcement learning agents that operate in diverse and complex environments can benefit from the structured decomposition of their behavior. Often, this is addressed in the context of hierarchical reinforcement learning, where the aim is to decompose a policy into lower-level primitives or options, and a higher-level meta-policy that triggers the appropriate behaviors for a given situation. However, the meta-policy must still produce appropriate decisions in all states. In this work, we propose a policy design that decomposes into primitives, similarly to hierarchical reinforcement learning"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.10667","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:42:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"khE8JJ0TVTxoR5HNatZ9udpyzgtr6fDAnOP7O+d32UbRkFnuQpTB7i2QFsccCfBo0YOpgwiELMfBlYI2SWgdAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:00:27.400871Z"},"content_sha256":"a70dd04a4f1f5188df5f01b25dfa3bbf9bdd2af74b9054cc4379e73120bc4124","schema_version":"1.0","event_id":"sha256:a70dd04a4f1f5188df5f01b25dfa3bbf9bdd2af74b9054cc4379e73120bc4124"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7VDVBWG53QIRCODHPOXVWZ4AXX/bundle.json","state_url":"https://pith.science/pith/7VDVBWG53QIRCODHPOXVWZ4AXX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7VDVBWG53QIRCODHPOXVWZ4AXX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T13:00:27Z","links":{"resolver":"https://pith.science/pith/7VDVBWG53QIRCODHPOXVWZ4AXX","bundle":"https://pith.science/pith/7VDVBWG53QIRCODHPOXVWZ4AXX/bundle.json","state":"https://pith.science/pith/7VDVBWG53QIRCODHPOXVWZ4AXX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7VDVBWG53QIRCODHPOXVWZ4AXX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:7VDVBWG53QIRCODHPOXVWZ4AXX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"40f3f5a6edf5dec9f1bea61c808d06bf49cad340b78680680a1b951333545893","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-25T17:04:48Z","title_canon_sha256":"526ff948978e6871cae92755af1a42446a03b0302335588e2aa2dff825796229"},"schema_version":"1.0","source":{"id":"1906.10667","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.10667","created_at":"2026-05-17T23:42:15Z"},{"alias_kind":"arxiv_version","alias_value":"1906.10667v1","created_at":"2026-05-17T23:42:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.10667","created_at":"2026-05-17T23:42:15Z"},{"alias_kind":"pith_short_12","alias_value":"7VDVBWG53QIR","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"7VDVBWG53QIRCODH","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"7VDVBWG5","created_at":"2026-05-18T12:33:12Z"}],"graph_snapshots":[{"event_id":"sha256:a70dd04a4f1f5188df5f01b25dfa3bbf9bdd2af74b9054cc4379e73120bc4124","target":"graph","created_at":"2026-05-17T23:42:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning agents that operate in diverse and complex environments can benefit from the structured decomposition of their behavior. Often, this is addressed in the context of hierarchical reinforcement learning, where the aim is to decompose a policy into lower-level primitives or options, and a higher-level meta-policy that triggers the appropriate behaviors for a given situation. However, the meta-policy must still produce appropriate decisions in all states. In this work, we propose a policy design that decomposes into primitives, similarly to hierarchical reinforcement learning","authors_text":"Anirudh Goyal, Jonathan Binas, Sergey Levine, Shagun Sodhani, Xue Bin Peng, Yoshua Bengio","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-25T17:04:48Z","title":"Reinforcement Learning with Competitive Ensembles of Information-Constrained Primitives"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.10667","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b4dd6e52eae8c9d7c10c5e118a763acd022c1e1ba20ad1a1c670c1dba8242ebb","target":"record","created_at":"2026-05-17T23:42:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"40f3f5a6edf5dec9f1bea61c808d06bf49cad340b78680680a1b951333545893","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-25T17:04:48Z","title_canon_sha256":"526ff948978e6871cae92755af1a42446a03b0302335588e2aa2dff825796229"},"schema_version":"1.0","source":{"id":"1906.10667","kind":"arxiv","version":1}},"canonical_sha256":"fd4750d8dddc111138677baf5b6780bdd7a27da1a0d74bf61c207d7c5e9be4da","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fd4750d8dddc111138677baf5b6780bdd7a27da1a0d74bf61c207d7c5e9be4da","first_computed_at":"2026-05-17T23:42:15.652014Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:42:15.652014Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0YaAUKsLz3QJA42LuRjNaY+b6qr6igWqgPl+roles0H0BtO88bMgfNy85agIxYPzee/EGmqzg42Y+KXa6C28DA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:42:15.652675Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.10667","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b4dd6e52eae8c9d7c10c5e118a763acd022c1e1ba20ad1a1c670c1dba8242ebb","sha256:a70dd04a4f1f5188df5f01b25dfa3bbf9bdd2af74b9054cc4379e73120bc4124"],"state_sha256":"cbdf317eea02362d59484e75579d5541bcdd3502a8b4f26f0da54af969660257"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PLZj7zoJ4JySKQHHjNphP/TtZPNj2KABZtCRleAnWvpSfkDTSALvBx687rSlC8SQD+Kl9ae3O/TDyI5Sw6iPAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T13:00:27.404706Z","bundle_sha256":"fd7a657ea953e393cf76aecccb3c9e8d16802ad6907932d72f3f2a981ede4aea"}}