{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:LT2FCGFPI3PMQHXELKYIHP7PFB","short_pith_number":"pith:LT2FCGFP","schema_version":"1.0","canonical_sha256":"5cf45118af46dec81ee45ab083bfef28759c032e68e0ea76574fd5f8fb7171a5","source":{"kind":"arxiv","id":"1810.10096","version":3},"attestation_state":"computed","paper":{"title":"Learning Representations in Model-Free Hierarchical Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","math.OC"],"primary_cat":"cs.AI","authors_text":"David C. Noelle, Jacob Rafati","submitted_at":"2018-10-23T21:24:06Z","abstract_excerpt":"Common approaches to Reinforcement Learning (RL) are seriously challenged by large-scale applications involving huge state spaces and sparse delayed reward feedback. Hierarchical Reinforcement Learning (HRL) methods attempt to address this scalability issue by learning action selection policies at multiple levels of temporal abstraction. Abstraction can be had by identifying a relatively small set of states that are likely to be useful as subgoals, in concert with the learning of corresponding skill policies to achieve those subgoals. Many approaches to subgoal discovery in HRL depend on the a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.10096","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-10-23T21:24:06Z","cross_cats_sorted":["cs.LG","math.OC"],"title_canon_sha256":"4b0197cc8e2f46733e3a18e0bf21fa28b8d3a07a27ee0d13b1820bb45eedbe05","abstract_canon_sha256":"3abeafdcd414306c0f4ea28975226509a5e7a7b494bf31477fbc7b08fdb7f963"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:45.497324Z","signature_b64":"ohKacZ4UEh2Ba4eqAHVAD+W8C6r4SGs2BIhxP8xqPxaGiOSian4c8F7BlxpVGvmeF1hjC/K17Ku2K2yCwQJNAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5cf45118af46dec81ee45ab083bfef28759c032e68e0ea76574fd5f8fb7171a5","last_reissued_at":"2026-05-17T23:48:45.496845Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:45.496845Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Representations in Model-Free Hierarchical Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","math.OC"],"primary_cat":"cs.AI","authors_text":"David C. Noelle, Jacob Rafati","submitted_at":"2018-10-23T21:24:06Z","abstract_excerpt":"Common approaches to Reinforcement Learning (RL) are seriously challenged by large-scale applications involving huge state spaces and sparse delayed reward feedback. Hierarchical Reinforcement Learning (HRL) methods attempt to address this scalability issue by learning action selection policies at multiple levels of temporal abstraction. Abstraction can be had by identifying a relatively small set of states that are likely to be useful as subgoals, in concert with the learning of corresponding skill policies to achieve those subgoals. Many approaches to subgoal discovery in HRL depend on the a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.10096","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.10096","created_at":"2026-05-17T23:48:45.496916+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.10096v3","created_at":"2026-05-17T23:48:45.496916+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.10096","created_at":"2026-05-17T23:48:45.496916+00:00"},{"alias_kind":"pith_short_12","alias_value":"LT2FCGFPI3PM","created_at":"2026-05-18T12:32:37.024351+00:00"},{"alias_kind":"pith_short_16","alias_value":"LT2FCGFPI3PMQHXE","created_at":"2026-05-18T12:32:37.024351+00:00"},{"alias_kind":"pith_short_8","alias_value":"LT2FCGFP","created_at":"2026-05-18T12:32:37.024351+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB","json":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB.json","graph_json":"https://pith.science/api/pith-number/LT2FCGFPI3PMQHXELKYIHP7PFB/graph.json","events_json":"https://pith.science/api/pith-number/LT2FCGFPI3PMQHXELKYIHP7PFB/events.json","paper":"https://pith.science/paper/LT2FCGFP"},"agent_actions":{"view_html":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB","download_json":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB.json","view_paper":"https://pith.science/paper/LT2FCGFP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.10096&json=true","fetch_graph":"https://pith.science/api/pith-number/LT2FCGFPI3PMQHXELKYIHP7PFB/graph.json","fetch_events":"https://pith.science/api/pith-number/LT2FCGFPI3PMQHXELKYIHP7PFB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB/action/storage_attestation","attest_author":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB/action/author_attestation","sign_citation":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB/action/citation_signature","submit_replication":"https://pith.science/pith/LT2FCGFPI3PMQHXELKYIHP7PFB/action/replication_record"}},"created_at":"2026-05-17T23:48:45.496916+00:00","updated_at":"2026-05-17T23:48:45.496916+00:00"}