{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:SZSBFIRCE3U73XHOQ2DZWB3FPS","short_pith_number":"pith:SZSBFIRC","canonical_record":{"source":{"id":"1705.10993","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-05-31T09:00:44Z","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"title_canon_sha256":"cf7319b1b35fb608a5514c24b2203e1f6a014b3fbb5f60ef3ebd0a42e2d3b060","abstract_canon_sha256":"4a1fd62071022c772872b8952e437437d8756d867de68117ce5dddeca4157293"},"schema_version":"1.0"},"canonical_sha256":"966412a22226e9fddcee86879b07657cbd7f3e470565664c91ef53ea04317a6b","source":{"kind":"arxiv","id":"1705.10993","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.10993","created_at":"2026-05-18T00:43:19Z"},{"alias_kind":"arxiv_version","alias_value":"1705.10993v1","created_at":"2026-05-18T00:43:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.10993","created_at":"2026-05-18T00:43:19Z"},{"alias_kind":"pith_short_12","alias_value":"SZSBFIRCE3U7","created_at":"2026-05-18T12:31:43Z"},{"alias_kind":"pith_short_16","alias_value":"SZSBFIRCE3U73XHO","created_at":"2026-05-18T12:31:43Z"},{"alias_kind":"pith_short_8","alias_value":"SZSBFIRC","created_at":"2026-05-18T12:31:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:SZSBFIRCE3U73XHOQ2DZWB3FPS","target":"record","payload":{"canonical_record":{"source":{"id":"1705.10993","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-05-31T09:00:44Z","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"title_canon_sha256":"cf7319b1b35fb608a5514c24b2203e1f6a014b3fbb5f60ef3ebd0a42e2d3b060","abstract_canon_sha256":"4a1fd62071022c772872b8952e437437d8756d867de68117ce5dddeca4157293"},"schema_version":"1.0"},"canonical_sha256":"966412a22226e9fddcee86879b07657cbd7f3e470565664c91ef53ea04317a6b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:43:19.584547Z","signature_b64":"8RHJjp0HDNCyDHFzA7WMf2gRzzM5DDb/TPJ30XKABhgQ/ItoQiaL60LXhPljLj62mGq1sVnZdDRiVbITVjp+Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"966412a22226e9fddcee86879b07657cbd7f3e470565664c91ef53ea04317a6b","last_reissued_at":"2026-05-18T00:43:19.583920Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:43:19.583920Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1705.10993","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8nUulNLQ44R3jyAhHo33XEWd3Sh9KneHyrqkEN9ZVZWGqR+Q7s6JsMrTgG0yti57RjfbO4dwuWMkfh0UpWIfAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T07:04:03.933449Z"},"content_sha256":"79eaffbe46ac07e7b811fe1d0336ed28dbc8c92a1b5fdf19891e7adb72f04264","schema_version":"1.0","event_id":"sha256:79eaffbe46ac07e7b811fe1d0336ed28dbc8c92a1b5fdf19891e7adb72f04264"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:SZSBFIRCE3U73XHOQ2DZWB3FPS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Non-Markovian Control with Gated End-to-End Memory Policy Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.NE"],"primary_cat":"stat.ML","authors_text":"Julien Perez, Tomi Silander","submitted_at":"2017-05-31T09:00:44Z","abstract_excerpt":"Partially observable environments present an important open challenge in the domain of sequential control learning with delayed rewards. Despite numerous attempts during the two last decades, the majority of reinforcement learning algorithms and associated approximate models, applied to this context, still assume Markovian state transitions. In this paper, we explore the use of a recently proposed attention-based model, the Gated End-to-End Memory Network, for sequential control. We call the resulting model the Gated End-to-End Memory Policy Network. More precisely, we use a model-free value-b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.10993","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EEtPY8pyoMZiahm4jNmsrs2a3ztL8jb+usuTZj8Ksxrc6tRx1dkUkMzjGvPx8PC2EG0AGzsnC/x0rWYP6gDLDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T07:04:03.934144Z"},"content_sha256":"a7354c4611bd0e45d237ce0a8ed2554d2b9d56d06b5c332d493d6ec18adcfe75","schema_version":"1.0","event_id":"sha256:a7354c4611bd0e45d237ce0a8ed2554d2b9d56d06b5c332d493d6ec18adcfe75"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS/bundle.json","state_url":"https://pith.science/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T07:04:03Z","links":{"resolver":"https://pith.science/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS","bundle":"https://pith.science/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS/bundle.json","state":"https://pith.science/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SZSBFIRCE3U73XHOQ2DZWB3FPS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:SZSBFIRCE3U73XHOQ2DZWB3FPS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4a1fd62071022c772872b8952e437437d8756d867de68117ce5dddeca4157293","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-05-31T09:00:44Z","title_canon_sha256":"cf7319b1b35fb608a5514c24b2203e1f6a014b3fbb5f60ef3ebd0a42e2d3b060"},"schema_version":"1.0","source":{"id":"1705.10993","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.10993","created_at":"2026-05-18T00:43:19Z"},{"alias_kind":"arxiv_version","alias_value":"1705.10993v1","created_at":"2026-05-18T00:43:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.10993","created_at":"2026-05-18T00:43:19Z"},{"alias_kind":"pith_short_12","alias_value":"SZSBFIRCE3U7","created_at":"2026-05-18T12:31:43Z"},{"alias_kind":"pith_short_16","alias_value":"SZSBFIRCE3U73XHO","created_at":"2026-05-18T12:31:43Z"},{"alias_kind":"pith_short_8","alias_value":"SZSBFIRC","created_at":"2026-05-18T12:31:43Z"}],"graph_snapshots":[{"event_id":"sha256:a7354c4611bd0e45d237ce0a8ed2554d2b9d56d06b5c332d493d6ec18adcfe75","target":"graph","created_at":"2026-05-18T00:43:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Partially observable environments present an important open challenge in the domain of sequential control learning with delayed rewards. Despite numerous attempts during the two last decades, the majority of reinforcement learning algorithms and associated approximate models, applied to this context, still assume Markovian state transitions. In this paper, we explore the use of a recently proposed attention-based model, the Gated End-to-End Memory Network, for sequential control. We call the resulting model the Gated End-to-End Memory Policy Network. More precisely, we use a model-free value-b","authors_text":"Julien Perez, Tomi Silander","cross_cats":["cs.AI","cs.LG","cs.NE"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-05-31T09:00:44Z","title":"Non-Markovian Control with Gated End-to-End Memory Policy Networks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.10993","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:79eaffbe46ac07e7b811fe1d0336ed28dbc8c92a1b5fdf19891e7adb72f04264","target":"record","created_at":"2026-05-18T00:43:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4a1fd62071022c772872b8952e437437d8756d867de68117ce5dddeca4157293","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-05-31T09:00:44Z","title_canon_sha256":"cf7319b1b35fb608a5514c24b2203e1f6a014b3fbb5f60ef3ebd0a42e2d3b060"},"schema_version":"1.0","source":{"id":"1705.10993","kind":"arxiv","version":1}},"canonical_sha256":"966412a22226e9fddcee86879b07657cbd7f3e470565664c91ef53ea04317a6b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"966412a22226e9fddcee86879b07657cbd7f3e470565664c91ef53ea04317a6b","first_computed_at":"2026-05-18T00:43:19.583920Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:43:19.583920Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8RHJjp0HDNCyDHFzA7WMf2gRzzM5DDb/TPJ30XKABhgQ/ItoQiaL60LXhPljLj62mGq1sVnZdDRiVbITVjp+Ag==","signature_status":"signed_v1","signed_at":"2026-05-18T00:43:19.584547Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.10993","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:79eaffbe46ac07e7b811fe1d0336ed28dbc8c92a1b5fdf19891e7adb72f04264","sha256:a7354c4611bd0e45d237ce0a8ed2554d2b9d56d06b5c332d493d6ec18adcfe75"],"state_sha256":"5bf8fdfc0cb921b274d8fb8c13ad4b5ffb937b68c6c5b900c49258c8b76b7e27"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WyFtNcoRV3hZzSBUS7S8DiBtx4xW8Da3XliQnEglVxypeTVdSY/kh24v0I24Gz/I4kKBf+ZvlKRmARldy0u/CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T07:04:03.938151Z","bundle_sha256":"688d6b0c02ad00239f8a692ab920113644d4da0a8eff7b621b519330de2d267d"}}