{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:7YX2275CYBKXP3OINVUDIYL6V6","short_pith_number":"pith:7YX2275C","canonical_record":{"source":{"id":"1711.03817","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-11-10T13:49:47Z","cross_cats_sorted":[],"title_canon_sha256":"e491565e6db34ac560967fe13ff668a1c71e9d083779455ff48e62994db54e0b","abstract_canon_sha256":"b19064edccccdc7f77d42a29eaa4e2a65ca681ee9bb95797309d2d6ee23be8f2"},"schema_version":"1.0"},"canonical_sha256":"fe2fad7fa2c05577edc86d6834617eaf995fc1e2d8d4e0eeab5da8a2ab5bbb34","source":{"kind":"arxiv","id":"1711.03817","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.03817","created_at":"2026-05-18T00:29:05Z"},{"alias_kind":"arxiv_version","alias_value":"1711.03817v2","created_at":"2026-05-18T00:29:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.03817","created_at":"2026-05-18T00:29:05Z"},{"alias_kind":"pith_short_12","alias_value":"7YX2275CYBKX","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"7YX2275CYBKXP3OI","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"7YX2275C","created_at":"2026-05-18T12:31:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:7YX2275CYBKXP3OINVUDIYL6V6","target":"record","payload":{"canonical_record":{"source":{"id":"1711.03817","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-11-10T13:49:47Z","cross_cats_sorted":[],"title_canon_sha256":"e491565e6db34ac560967fe13ff668a1c71e9d083779455ff48e62994db54e0b","abstract_canon_sha256":"b19064edccccdc7f77d42a29eaa4e2a65ca681ee9bb95797309d2d6ee23be8f2"},"schema_version":"1.0"},"canonical_sha256":"fe2fad7fa2c05577edc86d6834617eaf995fc1e2d8d4e0eeab5da8a2ab5bbb34","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:05.376009Z","signature_b64":"ez+e85STh+5gu3Jgxu47WbUeaqmBuygT77FuMmYTI+jc0ZcDkRGIOXv73PLEbtkAb+kX+3rR/hdScfcE8u8WAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fe2fad7fa2c05577edc86d6834617eaf995fc1e2d8d4e0eeab5da8a2ab5bbb34","last_reissued_at":"2026-05-18T00:29:05.375428Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:05.375428Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1711.03817","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:29:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gnEBEbUwRRALDC1AAfi4z3ap6P33p0/Awx5++RG3HOVHFVraBofd0gABl1ftWXF/hWhMXEqsVNF6JH/5wuaLBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:41:54.738875Z"},"content_sha256":"a116a0a5a3ed50e04b9fa227d5bf7c6274c971a66041610137ab1448f215f05a","schema_version":"1.0","event_id":"sha256:a116a0a5a3ed50e04b9fa227d5bf7c6274c971a66041610137ab1448f215f05a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:7YX2275CYBKXP3OINVUDIYL6V6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning with Options that Terminate Off-Policy","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anna Harutyunyan, Ann Nowe, Doina Precup, Peter Vrancx, Pierre-Luc Bacon","submitted_at":"2017-11-10T13:49:47Z","abstract_excerpt":"A temporally abstract action, or an option, is specified by a policy and a termination condition: the policy guides option behavior, and the termination condition roughly determines its length. Generally, learning with longer options (like learning with multi-step returns) is known to be more efficient. However, if the option set for the task is not ideal, and cannot express the primitive optimal policy exactly, shorter options offer more flexibility and can yield a better solution. Thus, the termination condition puts learning efficiency at odds with solution quality. We propose to resolve th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.03817","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:29:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wvYyXPjyN+7PiptnFcHuLZA5w2e+E1GZciLvhz9es7N2X8PjGYGzjjvXV+UbDmyFC8IrPQ0W0EqIWhGpxu+nDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:41:54.739309Z"},"content_sha256":"f91e9c1351ff61b4a6abfd414d82533bdcd7e511d9dc159dc1fe6f8967d97c61","schema_version":"1.0","event_id":"sha256:f91e9c1351ff61b4a6abfd414d82533bdcd7e511d9dc159dc1fe6f8967d97c61"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7YX2275CYBKXP3OINVUDIYL6V6/bundle.json","state_url":"https://pith.science/pith/7YX2275CYBKXP3OINVUDIYL6V6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7YX2275CYBKXP3OINVUDIYL6V6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T19:41:54Z","links":{"resolver":"https://pith.science/pith/7YX2275CYBKXP3OINVUDIYL6V6","bundle":"https://pith.science/pith/7YX2275CYBKXP3OINVUDIYL6V6/bundle.json","state":"https://pith.science/pith/7YX2275CYBKXP3OINVUDIYL6V6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7YX2275CYBKXP3OINVUDIYL6V6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:7YX2275CYBKXP3OINVUDIYL6V6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b19064edccccdc7f77d42a29eaa4e2a65ca681ee9bb95797309d2d6ee23be8f2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-11-10T13:49:47Z","title_canon_sha256":"e491565e6db34ac560967fe13ff668a1c71e9d083779455ff48e62994db54e0b"},"schema_version":"1.0","source":{"id":"1711.03817","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.03817","created_at":"2026-05-18T00:29:05Z"},{"alias_kind":"arxiv_version","alias_value":"1711.03817v2","created_at":"2026-05-18T00:29:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.03817","created_at":"2026-05-18T00:29:05Z"},{"alias_kind":"pith_short_12","alias_value":"7YX2275CYBKX","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"7YX2275CYBKXP3OI","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"7YX2275C","created_at":"2026-05-18T12:31:05Z"}],"graph_snapshots":[{"event_id":"sha256:f91e9c1351ff61b4a6abfd414d82533bdcd7e511d9dc159dc1fe6f8967d97c61","target":"graph","created_at":"2026-05-18T00:29:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"A temporally abstract action, or an option, is specified by a policy and a termination condition: the policy guides option behavior, and the termination condition roughly determines its length. Generally, learning with longer options (like learning with multi-step returns) is known to be more efficient. However, if the option set for the task is not ideal, and cannot express the primitive optimal policy exactly, shorter options offer more flexibility and can yield a better solution. Thus, the termination condition puts learning efficiency at odds with solution quality. We propose to resolve th","authors_text":"Anna Harutyunyan, Ann Nowe, Doina Precup, Peter Vrancx, Pierre-Luc Bacon","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-11-10T13:49:47Z","title":"Learning with Options that Terminate Off-Policy"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.03817","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a116a0a5a3ed50e04b9fa227d5bf7c6274c971a66041610137ab1448f215f05a","target":"record","created_at":"2026-05-18T00:29:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b19064edccccdc7f77d42a29eaa4e2a65ca681ee9bb95797309d2d6ee23be8f2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-11-10T13:49:47Z","title_canon_sha256":"e491565e6db34ac560967fe13ff668a1c71e9d083779455ff48e62994db54e0b"},"schema_version":"1.0","source":{"id":"1711.03817","kind":"arxiv","version":2}},"canonical_sha256":"fe2fad7fa2c05577edc86d6834617eaf995fc1e2d8d4e0eeab5da8a2ab5bbb34","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fe2fad7fa2c05577edc86d6834617eaf995fc1e2d8d4e0eeab5da8a2ab5bbb34","first_computed_at":"2026-05-18T00:29:05.375428Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:29:05.375428Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ez+e85STh+5gu3Jgxu47WbUeaqmBuygT77FuMmYTI+jc0ZcDkRGIOXv73PLEbtkAb+kX+3rR/hdScfcE8u8WAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:29:05.376009Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.03817","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a116a0a5a3ed50e04b9fa227d5bf7c6274c971a66041610137ab1448f215f05a","sha256:f91e9c1351ff61b4a6abfd414d82533bdcd7e511d9dc159dc1fe6f8967d97c61"],"state_sha256":"a2ef90ef3e7daa39c956d5f8c0c8f4a129802ad8c294cdb03d4ee2aa402eb46a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CmYjtIR6lew6S5nFwTGJO7jmmINxWaDPMPFC0VY2RVNfjTsZN6xHdCmS6tjCq99mgGSrKMeWxUR8SvbMIdneBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T19:41:54.742627Z","bundle_sha256":"d3d2bc5caf7ee56fa5d0a2e53225f336e0a1984fc18565db99e17a63f79e5bec"}}