{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:NLZ65XPGWOOAQWUT77HMRQHQCW","short_pith_number":"pith:NLZ65XPG","canonical_record":{"source":{"id":"1810.01266","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-29T18:40:13Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7ae811ec6f974eab2757568f3a0570346b961b701bd79556ff21cf9fb502e861","abstract_canon_sha256":"6cfd301d53e50e250287138a54d0a2fe43258fbedcef70a32a512e80eb05f43b"},"schema_version":"1.0"},"canonical_sha256":"6af3eedde6b39c085a93ffcec8c0f015a4b9aac5345bd74743f0fea6f75d9d70","source":{"kind":"arxiv","id":"1810.01266","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.01266","created_at":"2026-05-17T23:51:33Z"},{"alias_kind":"arxiv_version","alias_value":"1810.01266v2","created_at":"2026-05-17T23:51:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.01266","created_at":"2026-05-17T23:51:33Z"},{"alias_kind":"pith_short_12","alias_value":"NLZ65XPGWOOA","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NLZ65XPGWOOAQWUT","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NLZ65XPG","created_at":"2026-05-18T12:32:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:NLZ65XPGWOOAQWUT77HMRQHQCW","target":"record","payload":{"canonical_record":{"source":{"id":"1810.01266","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-29T18:40:13Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7ae811ec6f974eab2757568f3a0570346b961b701bd79556ff21cf9fb502e861","abstract_canon_sha256":"6cfd301d53e50e250287138a54d0a2fe43258fbedcef70a32a512e80eb05f43b"},"schema_version":"1.0"},"canonical_sha256":"6af3eedde6b39c085a93ffcec8c0f015a4b9aac5345bd74743f0fea6f75d9d70","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:51:33.175834Z","signature_b64":"sSEGvrS43Un9TKx7I6SOUCi9EQdq2Nl3PGUabWfuWAdEhbLPAbFnDuoB+oFQg1bBGkaKz+1vXVbATo8qq4amCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6af3eedde6b39c085a93ffcec8c0f015a4b9aac5345bd74743f0fea6f75d9d70","last_reissued_at":"2026-05-17T23:51:33.175263Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:51:33.175263Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1810.01266","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:51:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bgiQ+J7oHogGEePqJdeSl3OlSrKTYHrw+FHIuWCn+M5jdxsJ44TcaLP5vS+aCNi+GIaj+Te3z+c/JwqRDv6aDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T05:46:11.680694Z"},"content_sha256":"bad28cc90fdeb990fc0ca934bdfad04c55aa309e5bfbfbc2346dc8f6294f01ec","schema_version":"1.0","event_id":"sha256:bad28cc90fdeb990fc0ca934bdfad04c55aa309e5bfbfbc2346dc8f6294f01ec"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:NLZ65XPGWOOAQWUT77HMRQHQCW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Directed-Info GAIL: Learning Hierarchical Policies from Unsegmented Demonstrations using Directed Information","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Arjun Sharma, Kris M. Kitani, Mohit Sharma, Nicholas Rhinehart","submitted_at":"2018-09-29T18:40:13Z","abstract_excerpt":"The use of imitation learning to learn a single policy for a complex task that has multiple modes or hierarchical structure can be challenging. In fact, previous work has shown that when the modes are known, learning separate policies for each mode or sub-task can greatly improve the performance of imitation learning. In this work, we discover the interaction between sub-tasks from their resulting state-action trajectory sequences using a directed graphical model. We propose a new algorithm based on the generative adversarial imitation learning framework which automatically learns sub-task pol"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.01266","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:51:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XYCxUyT6Z5fxl8dIcn73r4UVO+W+JCY/XF/lgMS3LxxEXNT/pQFERbyWVQfUBb3bAf+9bNW+1294I7lcVq6eBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T05:46:11.681043Z"},"content_sha256":"604f6b722ae5f758ad23c65aee7581438cf64aa8ab16c44ba85043adcbf9cde0","schema_version":"1.0","event_id":"sha256:604f6b722ae5f758ad23c65aee7581438cf64aa8ab16c44ba85043adcbf9cde0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NLZ65XPGWOOAQWUT77HMRQHQCW/bundle.json","state_url":"https://pith.science/pith/NLZ65XPGWOOAQWUT77HMRQHQCW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NLZ65XPGWOOAQWUT77HMRQHQCW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-24T05:46:11Z","links":{"resolver":"https://pith.science/pith/NLZ65XPGWOOAQWUT77HMRQHQCW","bundle":"https://pith.science/pith/NLZ65XPGWOOAQWUT77HMRQHQCW/bundle.json","state":"https://pith.science/pith/NLZ65XPGWOOAQWUT77HMRQHQCW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NLZ65XPGWOOAQWUT77HMRQHQCW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:NLZ65XPGWOOAQWUT77HMRQHQCW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6cfd301d53e50e250287138a54d0a2fe43258fbedcef70a32a512e80eb05f43b","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-29T18:40:13Z","title_canon_sha256":"7ae811ec6f974eab2757568f3a0570346b961b701bd79556ff21cf9fb502e861"},"schema_version":"1.0","source":{"id":"1810.01266","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.01266","created_at":"2026-05-17T23:51:33Z"},{"alias_kind":"arxiv_version","alias_value":"1810.01266v2","created_at":"2026-05-17T23:51:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.01266","created_at":"2026-05-17T23:51:33Z"},{"alias_kind":"pith_short_12","alias_value":"NLZ65XPGWOOA","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NLZ65XPGWOOAQWUT","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NLZ65XPG","created_at":"2026-05-18T12:32:40Z"}],"graph_snapshots":[{"event_id":"sha256:604f6b722ae5f758ad23c65aee7581438cf64aa8ab16c44ba85043adcbf9cde0","target":"graph","created_at":"2026-05-17T23:51:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The use of imitation learning to learn a single policy for a complex task that has multiple modes or hierarchical structure can be challenging. In fact, previous work has shown that when the modes are known, learning separate policies for each mode or sub-task can greatly improve the performance of imitation learning. In this work, we discover the interaction between sub-tasks from their resulting state-action trajectory sequences using a directed graphical model. We propose a new algorithm based on the generative adversarial imitation learning framework which automatically learns sub-task pol","authors_text":"Arjun Sharma, Kris M. Kitani, Mohit Sharma, Nicholas Rhinehart","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-29T18:40:13Z","title":"Directed-Info GAIL: Learning Hierarchical Policies from Unsegmented Demonstrations using Directed Information"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.01266","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bad28cc90fdeb990fc0ca934bdfad04c55aa309e5bfbfbc2346dc8f6294f01ec","target":"record","created_at":"2026-05-17T23:51:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6cfd301d53e50e250287138a54d0a2fe43258fbedcef70a32a512e80eb05f43b","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-29T18:40:13Z","title_canon_sha256":"7ae811ec6f974eab2757568f3a0570346b961b701bd79556ff21cf9fb502e861"},"schema_version":"1.0","source":{"id":"1810.01266","kind":"arxiv","version":2}},"canonical_sha256":"6af3eedde6b39c085a93ffcec8c0f015a4b9aac5345bd74743f0fea6f75d9d70","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6af3eedde6b39c085a93ffcec8c0f015a4b9aac5345bd74743f0fea6f75d9d70","first_computed_at":"2026-05-17T23:51:33.175263Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:51:33.175263Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sSEGvrS43Un9TKx7I6SOUCi9EQdq2Nl3PGUabWfuWAdEhbLPAbFnDuoB+oFQg1bBGkaKz+1vXVbATo8qq4amCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:51:33.175834Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.01266","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bad28cc90fdeb990fc0ca934bdfad04c55aa309e5bfbfbc2346dc8f6294f01ec","sha256:604f6b722ae5f758ad23c65aee7581438cf64aa8ab16c44ba85043adcbf9cde0"],"state_sha256":"498ee346ba50c3b9460352df17f308679c3d9408d1d5e481c995ec1f9f913d42"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"D6T5TZO+vMSzcC9NcCpjKTViuVj3Tj7TLEAe0yyu3eDgIamdnnC1KWgcW1dqfd+TWOLGFUv5w/WQEjrib5WwBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-24T05:46:11.683010Z","bundle_sha256":"2f7a105600c4a443fb9fc018ad8d959fd6387702894a92d89a4a44dd8b53aa34"}}