{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:RZ5PKM5SPDG3GSDBTJMRWZ2AM6","short_pith_number":"pith:RZ5PKM5S","canonical_record":{"source":{"id":"1806.10792","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-28T06:56:19Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"f6e1c141fc5a5d956664ec2f70184d12930e708ed3e373c1bb1fcf43af0d0307","abstract_canon_sha256":"c8bacd9637d9705bc81b1d923e937d7e5ebae0aac590c8f8a176ad1ff14444a3"},"schema_version":"1.0"},"canonical_sha256":"8e7af533b278cdb348619a591b674067adb256dfb93e125e96a9d0765f2edd1c","source":{"kind":"arxiv","id":"1806.10792","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.10792","created_at":"2026-05-18T00:12:07Z"},{"alias_kind":"arxiv_version","alias_value":"1806.10792v1","created_at":"2026-05-18T00:12:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.10792","created_at":"2026-05-18T00:12:07Z"},{"alias_kind":"pith_short_12","alias_value":"RZ5PKM5SPDG3","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"RZ5PKM5SPDG3GSDB","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"RZ5PKM5S","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:RZ5PKM5SPDG3GSDBTJMRWZ2AM6","target":"record","payload":{"canonical_record":{"source":{"id":"1806.10792","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-28T06:56:19Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"f6e1c141fc5a5d956664ec2f70184d12930e708ed3e373c1bb1fcf43af0d0307","abstract_canon_sha256":"c8bacd9637d9705bc81b1d923e937d7e5ebae0aac590c8f8a176ad1ff14444a3"},"schema_version":"1.0"},"canonical_sha256":"8e7af533b278cdb348619a591b674067adb256dfb93e125e96a9d0765f2edd1c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:12:07.929563Z","signature_b64":"uA9xD4TBLpq0xgrNUb+yrh/QWBjDDM4BvKVcjvtpd++L1rYpdHhUvr5EnRKA8PdMj+/StFQOEOgwF6fHXdGjBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8e7af533b278cdb348619a591b674067adb256dfb93e125e96a9d0765f2edd1c","last_reissued_at":"2026-05-18T00:12:07.928911Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:12:07.928911Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.10792","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:12:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"N3yCwulbPANXuJFpj0dqZOHef2ryKIqgEFSbPB6u1olt992vauO6+m8tDNN7b6v9mcDiYNkNZqpjkQa+br4/Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T14:17:05.432450Z"},"content_sha256":"8a004702112021d25e887624895fed90d9591010eae12d94915176f680b65df6","schema_version":"1.0","event_id":"sha256:8a004702112021d25e887624895fed90d9591010eae12d94915176f680b65df6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:RZ5PKM5SPDG3GSDBTJMRWZ2AM6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Hierarchical Reinforcement Learning with Abductive Planning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Kazeto Yamamoto, Takashi Onishi, Yoshimasa Tsuruoka","submitted_at":"2018-06-28T06:56:19Z","abstract_excerpt":"One of the key challenges in applying reinforcement learning to real-life problems is that the amount of train-and-error required to learn a good policy increases drastically as the task becomes complex. One potential solution to this problem is to combine reinforcement learning with automated symbol planning and utilize prior knowledge on the domain. However, existing methods have limitations in their applicability and expressiveness. In this paper we propose a hierarchical reinforcement learning method based on abductive symbolic planning. The planner can deal with user-defined evaluation fu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.10792","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:12:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FwmFVqA1YW5QT5JewN3EKyMlEBcWFoSODzURdoT7a8cCkl79/zT0w5xM7QxqewrrsbZZhqct//Dzfw/cjoY1Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T14:17:05.432801Z"},"content_sha256":"0fba77b4be813d80355e3f491ee34292e0191ea31921a05a35537b4df143ff45","schema_version":"1.0","event_id":"sha256:0fba77b4be813d80355e3f491ee34292e0191ea31921a05a35537b4df143ff45"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6/bundle.json","state_url":"https://pith.science/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-25T14:17:05Z","links":{"resolver":"https://pith.science/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6","bundle":"https://pith.science/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6/bundle.json","state":"https://pith.science/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RZ5PKM5SPDG3GSDBTJMRWZ2AM6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:RZ5PKM5SPDG3GSDBTJMRWZ2AM6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c8bacd9637d9705bc81b1d923e937d7e5ebae0aac590c8f8a176ad1ff14444a3","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-28T06:56:19Z","title_canon_sha256":"f6e1c141fc5a5d956664ec2f70184d12930e708ed3e373c1bb1fcf43af0d0307"},"schema_version":"1.0","source":{"id":"1806.10792","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.10792","created_at":"2026-05-18T00:12:07Z"},{"alias_kind":"arxiv_version","alias_value":"1806.10792v1","created_at":"2026-05-18T00:12:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.10792","created_at":"2026-05-18T00:12:07Z"},{"alias_kind":"pith_short_12","alias_value":"RZ5PKM5SPDG3","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"RZ5PKM5SPDG3GSDB","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"RZ5PKM5S","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:0fba77b4be813d80355e3f491ee34292e0191ea31921a05a35537b4df143ff45","target":"graph","created_at":"2026-05-18T00:12:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"One of the key challenges in applying reinforcement learning to real-life problems is that the amount of train-and-error required to learn a good policy increases drastically as the task becomes complex. One potential solution to this problem is to combine reinforcement learning with automated symbol planning and utilize prior knowledge on the domain. However, existing methods have limitations in their applicability and expressiveness. In this paper we propose a hierarchical reinforcement learning method based on abductive symbolic planning. The planner can deal with user-defined evaluation fu","authors_text":"Kazeto Yamamoto, Takashi Onishi, Yoshimasa Tsuruoka","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-28T06:56:19Z","title":"Hierarchical Reinforcement Learning with Abductive Planning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.10792","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8a004702112021d25e887624895fed90d9591010eae12d94915176f680b65df6","target":"record","created_at":"2026-05-18T00:12:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c8bacd9637d9705bc81b1d923e937d7e5ebae0aac590c8f8a176ad1ff14444a3","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-28T06:56:19Z","title_canon_sha256":"f6e1c141fc5a5d956664ec2f70184d12930e708ed3e373c1bb1fcf43af0d0307"},"schema_version":"1.0","source":{"id":"1806.10792","kind":"arxiv","version":1}},"canonical_sha256":"8e7af533b278cdb348619a591b674067adb256dfb93e125e96a9d0765f2edd1c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8e7af533b278cdb348619a591b674067adb256dfb93e125e96a9d0765f2edd1c","first_computed_at":"2026-05-18T00:12:07.928911Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:12:07.928911Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"uA9xD4TBLpq0xgrNUb+yrh/QWBjDDM4BvKVcjvtpd++L1rYpdHhUvr5EnRKA8PdMj+/StFQOEOgwF6fHXdGjBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:12:07.929563Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.10792","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8a004702112021d25e887624895fed90d9591010eae12d94915176f680b65df6","sha256:0fba77b4be813d80355e3f491ee34292e0191ea31921a05a35537b4df143ff45"],"state_sha256":"11d327c8c5e937ec8e037a52039f875e58738f977e892c066c0c05b3809c58ef"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QdoBOMJFtltZzLo8VjhIaUksUDHDO3jX+8IkvCuC4vi5xMzhjb6D2Ucpgux4Qjiip2NzdXL9Em5wJXbSnMthCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-25T14:17:05.434898Z","bundle_sha256":"9469791bb4fb2ba264c9833a94eb22b25283734e06e7c847effa8e2457f5c244"}}