{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:4MWB3EUFKGFSIKGLMBLE3Q2TCC","short_pith_number":"pith:4MWB3EUF","canonical_record":{"source":{"id":"1811.04324","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-10T23:35:34Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"61cf76c181532a9b9b4e68f59d3f489279bcec7de9891c87b6f4efc1075e10c3","abstract_canon_sha256":"7478674c1ce3653f018b2d3064e803035fa26b61b77977cf745f867cdbbbe0ca"},"schema_version":"1.0"},"canonical_sha256":"e32c1d9285518b2428cb60564dc35310b36d51bc99faf3089ecf047ea38ca753","source":{"kind":"arxiv","id":"1811.04324","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04324","created_at":"2026-05-18T00:00:46Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04324v2","created_at":"2026-05-18T00:00:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04324","created_at":"2026-05-18T00:00:46Z"},{"alias_kind":"pith_short_12","alias_value":"4MWB3EUFKGFS","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"4MWB3EUFKGFSIKGL","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"4MWB3EUF","created_at":"2026-05-18T12:32:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:4MWB3EUFKGFSIKGLMBLE3Q2TCC","target":"record","payload":{"canonical_record":{"source":{"id":"1811.04324","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-10T23:35:34Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"61cf76c181532a9b9b4e68f59d3f489279bcec7de9891c87b6f4efc1075e10c3","abstract_canon_sha256":"7478674c1ce3653f018b2d3064e803035fa26b61b77977cf745f867cdbbbe0ca"},"schema_version":"1.0"},"canonical_sha256":"e32c1d9285518b2428cb60564dc35310b36d51bc99faf3089ecf047ea38ca753","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:46.832519Z","signature_b64":"5lJdXK9UpcOZi9T3nd79tncy3hzDVUKYX+2lKtXchi6Y7t+CzjJTbmPtl6P8c+/DL7EQyuhDdA2HdI0Yu2QoCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e32c1d9285518b2428cb60564dc35310b36d51bc99faf3089ecf047ea38ca753","last_reissued_at":"2026-05-18T00:00:46.832089Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:46.832089Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.04324","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oKZh+5gfaPS6jKvjRzMn/Sl7NCOBxdz0IaOGr9ZinAGaX7wdLWe36KWFo0lHfcKmVnsA1jdwFBrchbpQmrsaCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T18:08:47.488916Z"},"content_sha256":"d3edfe31852c055a36684ef62378c85f2f460d52a20e19b081e43d0398116fc2","schema_version":"1.0","event_id":"sha256:d3edfe31852c055a36684ef62378c85f2f460d52a20e19b081e43d0398116fc2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:4MWB3EUFKGFSIKGLMBLE3Q2TCC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Diversity-Driven Extensible Hierarchical Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Jianyi Wang, Mai Xu, Thomas Lukasiewicz, Yuhang Song, Zhenghua Xu","submitted_at":"2018-11-10T23:35:34Z","abstract_excerpt":"Hierarchical reinforcement learning (HRL) has recently shown promising advances on speeding up learning, improving the exploration, and discovering intertask transferable skills. Most recent works focus on HRL with two levels, i.e., a master policy manipulates subpolicies, which in turn manipulate primitive actions. However, HRL with multiple levels is usually needed in many real-world scenarios, whose ultimate goals are highly abstract, while their actions are very primitive. Therefore, in this paper, we propose a diversity-driven extensible HRL (DEHRL), where an extensible and scalable frame"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04324","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gZJXMXEzTSyTuFKmOKVTMydlqWa4ADhmUYyOvdmEhl7lRqA2AF5wqPUMbZhJ2hw+UbJiFl8v09Fz/Lf0k1D3Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T18:08:47.489297Z"},"content_sha256":"86fc12c5fb3262e625b6d4861ef0aa1a1cd0528d731d3f416e2494886f9a1a07","schema_version":"1.0","event_id":"sha256:86fc12c5fb3262e625b6d4861ef0aa1a1cd0528d731d3f416e2494886f9a1a07"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC/bundle.json","state_url":"https://pith.science/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T18:08:47Z","links":{"resolver":"https://pith.science/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC","bundle":"https://pith.science/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC/bundle.json","state":"https://pith.science/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4MWB3EUFKGFSIKGLMBLE3Q2TCC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:4MWB3EUFKGFSIKGLMBLE3Q2TCC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7478674c1ce3653f018b2d3064e803035fa26b61b77977cf745f867cdbbbe0ca","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-10T23:35:34Z","title_canon_sha256":"61cf76c181532a9b9b4e68f59d3f489279bcec7de9891c87b6f4efc1075e10c3"},"schema_version":"1.0","source":{"id":"1811.04324","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04324","created_at":"2026-05-18T00:00:46Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04324v2","created_at":"2026-05-18T00:00:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04324","created_at":"2026-05-18T00:00:46Z"},{"alias_kind":"pith_short_12","alias_value":"4MWB3EUFKGFS","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"4MWB3EUFKGFSIKGL","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"4MWB3EUF","created_at":"2026-05-18T12:32:05Z"}],"graph_snapshots":[{"event_id":"sha256:86fc12c5fb3262e625b6d4861ef0aa1a1cd0528d731d3f416e2494886f9a1a07","target":"graph","created_at":"2026-05-18T00:00:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Hierarchical reinforcement learning (HRL) has recently shown promising advances on speeding up learning, improving the exploration, and discovering intertask transferable skills. Most recent works focus on HRL with two levels, i.e., a master policy manipulates subpolicies, which in turn manipulate primitive actions. However, HRL with multiple levels is usually needed in many real-world scenarios, whose ultimate goals are highly abstract, while their actions are very primitive. Therefore, in this paper, we propose a diversity-driven extensible HRL (DEHRL), where an extensible and scalable frame","authors_text":"Jianyi Wang, Mai Xu, Thomas Lukasiewicz, Yuhang Song, Zhenghua Xu","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-10T23:35:34Z","title":"Diversity-Driven Extensible Hierarchical Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04324","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d3edfe31852c055a36684ef62378c85f2f460d52a20e19b081e43d0398116fc2","target":"record","created_at":"2026-05-18T00:00:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7478674c1ce3653f018b2d3064e803035fa26b61b77977cf745f867cdbbbe0ca","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-10T23:35:34Z","title_canon_sha256":"61cf76c181532a9b9b4e68f59d3f489279bcec7de9891c87b6f4efc1075e10c3"},"schema_version":"1.0","source":{"id":"1811.04324","kind":"arxiv","version":2}},"canonical_sha256":"e32c1d9285518b2428cb60564dc35310b36d51bc99faf3089ecf047ea38ca753","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e32c1d9285518b2428cb60564dc35310b36d51bc99faf3089ecf047ea38ca753","first_computed_at":"2026-05-18T00:00:46.832089Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:00:46.832089Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5lJdXK9UpcOZi9T3nd79tncy3hzDVUKYX+2lKtXchi6Y7t+CzjJTbmPtl6P8c+/DL7EQyuhDdA2HdI0Yu2QoCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:00:46.832519Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.04324","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d3edfe31852c055a36684ef62378c85f2f460d52a20e19b081e43d0398116fc2","sha256:86fc12c5fb3262e625b6d4861ef0aa1a1cd0528d731d3f416e2494886f9a1a07"],"state_sha256":"d3c0c8c32c5de43b9bfbfa109cfb46252a1d6ed600425910ae602eae886fe5c1"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mLBYWimkpL7L26/w7y9aLYmoxOricRWwaQ0Ll4UVouC3/y8x7XmH3VE2MIZauz/fsplT89wu4bcnc0L0eIc2Aw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T18:08:47.492132Z","bundle_sha256":"91ef5522a8c81cfd70180d2da1cdb57310c47d51c8943a0129780ba35522b721"}}