{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:COFSBSGRGIBZB5UYICRWFUUVH2","short_pith_number":"pith:COFSBSGR","canonical_record":{"source":{"id":"1803.00590","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T19:12:27Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7273c7ff515675f0aa72269a21f5d84c06a896f7ee170a8e0d9ef671f6ef7f7d","abstract_canon_sha256":"356c6aaa6fb9b28fc785f6866a64429eb13d8637a5394387ca60813167029690"},"schema_version":"1.0"},"canonical_sha256":"138b20c8d1320390f69840a362d2953e82fb4e34e2598296f6982e895d0d643f","source":{"kind":"arxiv","id":"1803.00590","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00590","created_at":"2026-05-18T00:13:44Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00590v2","created_at":"2026-05-18T00:13:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00590","created_at":"2026-05-18T00:13:44Z"},{"alias_kind":"pith_short_12","alias_value":"COFSBSGRGIBZ","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"COFSBSGRGIBZB5UY","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"COFSBSGR","created_at":"2026-05-18T12:32:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:COFSBSGRGIBZB5UYICRWFUUVH2","target":"record","payload":{"canonical_record":{"source":{"id":"1803.00590","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T19:12:27Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7273c7ff515675f0aa72269a21f5d84c06a896f7ee170a8e0d9ef671f6ef7f7d","abstract_canon_sha256":"356c6aaa6fb9b28fc785f6866a64429eb13d8637a5394387ca60813167029690"},"schema_version":"1.0"},"canonical_sha256":"138b20c8d1320390f69840a362d2953e82fb4e34e2598296f6982e895d0d643f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:44.720522Z","signature_b64":"TM5Hkg1HyCCjMKWkclcUbd3BtTAN+A0A2adKWK9UldxLB4hnks/bXO7FC5d2EfYZAq3P5Sfg2cqUCjrwQvO9DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"138b20c8d1320390f69840a362d2953e82fb4e34e2598296f6982e895d0d643f","last_reissued_at":"2026-05-18T00:13:44.719837Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:44.719837Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.00590","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:44Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HwfQNBzByp7KQlHgtb0pHIf9ZLEiRfw0V8te0yfEYpHV71mkcsW0OB2CqoDKikivLW8Oaqr1gErIxmEuClPmAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T21:11:55.463155Z"},"content_sha256":"13ed584a3f6d8fd2b3d298e96855233975917da50da8f42cc80867f1222e46e2","schema_version":"1.0","event_id":"sha256:13ed584a3f6d8fd2b3d298e96855233975917da50da8f42cc80867f1222e46e2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:COFSBSGRGIBZB5UYICRWFUUVH2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Hierarchical Imitation and Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alekh Agarwal, Hal Daum\\'e III, Hoang M. Le, Miroslav Dud\\'ik, Nan Jiang, Yisong Yue","submitted_at":"2018-03-01T19:12:27Z","abstract_excerpt":"We study how to effectively leverage expert feedback to learn sequential decision-making policies. We focus on problems with sparse rewards and long time horizons, which typically pose significant challenges in reinforcement learning. We propose an algorithmic framework, called hierarchical guidance, that leverages the hierarchical structure of the underlying problem to integrate different modes of expert interaction. Our framework can incorporate different combinations of imitation learning (IL) and reinforcement learning (RL) at different levels, leading to dramatic reductions in both expert"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00590","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:44Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gWyFFGXzvhUfI6HAHD7Vlaq+JLX0sAd4ap8h5/AkzhvFrxqjNpxYDDrCps3HWgNP0twEcRsRwrxQvSEoOVYwBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T21:11:55.463843Z"},"content_sha256":"a894d06f707487a1c25ff9108e6f58454f04f88c33d661e05600cd2f6027d5d0","schema_version":"1.0","event_id":"sha256:a894d06f707487a1c25ff9108e6f58454f04f88c33d661e05600cd2f6027d5d0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/COFSBSGRGIBZB5UYICRWFUUVH2/bundle.json","state_url":"https://pith.science/pith/COFSBSGRGIBZB5UYICRWFUUVH2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/COFSBSGRGIBZB5UYICRWFUUVH2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T21:11:55Z","links":{"resolver":"https://pith.science/pith/COFSBSGRGIBZB5UYICRWFUUVH2","bundle":"https://pith.science/pith/COFSBSGRGIBZB5UYICRWFUUVH2/bundle.json","state":"https://pith.science/pith/COFSBSGRGIBZB5UYICRWFUUVH2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/COFSBSGRGIBZB5UYICRWFUUVH2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:COFSBSGRGIBZB5UYICRWFUUVH2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"356c6aaa6fb9b28fc785f6866a64429eb13d8637a5394387ca60813167029690","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T19:12:27Z","title_canon_sha256":"7273c7ff515675f0aa72269a21f5d84c06a896f7ee170a8e0d9ef671f6ef7f7d"},"schema_version":"1.0","source":{"id":"1803.00590","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00590","created_at":"2026-05-18T00:13:44Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00590v2","created_at":"2026-05-18T00:13:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00590","created_at":"2026-05-18T00:13:44Z"},{"alias_kind":"pith_short_12","alias_value":"COFSBSGRGIBZ","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"COFSBSGRGIBZB5UY","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"COFSBSGR","created_at":"2026-05-18T12:32:16Z"}],"graph_snapshots":[{"event_id":"sha256:a894d06f707487a1c25ff9108e6f58454f04f88c33d661e05600cd2f6027d5d0","target":"graph","created_at":"2026-05-18T00:13:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We study how to effectively leverage expert feedback to learn sequential decision-making policies. We focus on problems with sparse rewards and long time horizons, which typically pose significant challenges in reinforcement learning. We propose an algorithmic framework, called hierarchical guidance, that leverages the hierarchical structure of the underlying problem to integrate different modes of expert interaction. Our framework can incorporate different combinations of imitation learning (IL) and reinforcement learning (RL) at different levels, leading to dramatic reductions in both expert","authors_text":"Alekh Agarwal, Hal Daum\\'e III, Hoang M. Le, Miroslav Dud\\'ik, Nan Jiang, Yisong Yue","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T19:12:27Z","title":"Hierarchical Imitation and Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00590","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:13ed584a3f6d8fd2b3d298e96855233975917da50da8f42cc80867f1222e46e2","target":"record","created_at":"2026-05-18T00:13:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"356c6aaa6fb9b28fc785f6866a64429eb13d8637a5394387ca60813167029690","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T19:12:27Z","title_canon_sha256":"7273c7ff515675f0aa72269a21f5d84c06a896f7ee170a8e0d9ef671f6ef7f7d"},"schema_version":"1.0","source":{"id":"1803.00590","kind":"arxiv","version":2}},"canonical_sha256":"138b20c8d1320390f69840a362d2953e82fb4e34e2598296f6982e895d0d643f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"138b20c8d1320390f69840a362d2953e82fb4e34e2598296f6982e895d0d643f","first_computed_at":"2026-05-18T00:13:44.719837Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:44.719837Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"TM5Hkg1HyCCjMKWkclcUbd3BtTAN+A0A2adKWK9UldxLB4hnks/bXO7FC5d2EfYZAq3P5Sfg2cqUCjrwQvO9DQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:44.720522Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.00590","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:13ed584a3f6d8fd2b3d298e96855233975917da50da8f42cc80867f1222e46e2","sha256:a894d06f707487a1c25ff9108e6f58454f04f88c33d661e05600cd2f6027d5d0"],"state_sha256":"167aa187a877a57d19e247d6dcfa9971a5c43c29cc63da8544b150bd1d9d0645"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v7DCgwogUgmAKCefuy6T+UMjDBpDCkFo2gcnKvxPAK02UFhTv06qQwfgtOjkhqQ1ThL+ol1YUP2m4Ov+Lt4ODw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T21:11:55.467419Z","bundle_sha256":"c75cf140f420d4555ea5f488cb0a3ba4b3328576dcd64a7ef423c66accf17258"}}