{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:Z3GJD6Y7J4QQRZTNWK7K5HYMUC","short_pith_number":"pith:Z3GJD6Y7","canonical_record":{"source":{"id":"1808.10692","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-31T11:47:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"08e8cd7a8496a6189ed3b6e4812d056f9dc7ca959b5b3a50c6f78c604d67fb33","abstract_canon_sha256":"2548f62e5e7c5c8d49f506d13600a5b18b3e691cf25c29137998d2a00d76af89"},"schema_version":"1.0"},"canonical_sha256":"cecc91fb1f4f2108e66db2beae9f0ca0afda8e7d66378f57a4eee39bdcc69cad","source":{"kind":"arxiv","id":"1808.10692","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.10692","created_at":"2026-05-18T00:06:45Z"},{"alias_kind":"arxiv_version","alias_value":"1808.10692v1","created_at":"2026-05-18T00:06:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.10692","created_at":"2026-05-18T00:06:45Z"},{"alias_kind":"pith_short_12","alias_value":"Z3GJD6Y7J4QQ","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"Z3GJD6Y7J4QQRZTN","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"Z3GJD6Y7","created_at":"2026-05-18T12:33:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:Z3GJD6Y7J4QQRZTNWK7K5HYMUC","target":"record","payload":{"canonical_record":{"source":{"id":"1808.10692","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-31T11:47:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"08e8cd7a8496a6189ed3b6e4812d056f9dc7ca959b5b3a50c6f78c604d67fb33","abstract_canon_sha256":"2548f62e5e7c5c8d49f506d13600a5b18b3e691cf25c29137998d2a00d76af89"},"schema_version":"1.0"},"canonical_sha256":"cecc91fb1f4f2108e66db2beae9f0ca0afda8e7d66378f57a4eee39bdcc69cad","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:06:45.072047Z","signature_b64":"AlSVsBYCxyEadblv24EzNoCcqD5lUQ5/nucE8lCbh4ndUJafGvlnjJAWfOk0aPc7xci2/NMqo2mk5SAA2k4ACQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cecc91fb1f4f2108e66db2beae9f0ca0afda8e7d66378f57a4eee39bdcc69cad","last_reissued_at":"2026-05-18T00:06:45.071487Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:06:45.071487Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1808.10692","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T6hDRFiIkyQBAdIBK15kIgy9qlP1D0VjkCBNbn4hTUlfBrLMLqkKHd0MLHOWJrmdSNhC51Eq27wcvX5jWbWhBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T17:39:02.847057Z"},"content_sha256":"d41040b6999cfa324c29f0fd9c9d6730e07e1fcb789cf4b2c6ce247c6aa7eaa8","schema_version":"1.0","event_id":"sha256:d41040b6999cfa324c29f0fd9c9d6730e07e1fcb789cf4b2c6ce247c6aa7eaa8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:Z3GJD6Y7J4QQRZTNWK7K5HYMUC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"APES: a Python toolbox for simulating reinforcement learning environments","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Aqeel Labash, Ardi Tampuu, Jaan Aru, Raul Vicente, Tambet Matiisen","submitted_at":"2018-08-31T11:47:10Z","abstract_excerpt":"Assisted by neural networks, reinforcement learning agents have been able to solve increasingly complex tasks over the last years. The simulation environment in which the agents interact is an essential component in any reinforcement learning problem. The environment simulates the dynamics of the agents' world and hence provides feedback to their actions in terms of state observations and external rewards. To ease the design and simulation of such environments this work introduces $\\texttt{APES}$, a highly customizable and open source package in Python to create 2D grid-world environments for "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.10692","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8PbTuxGeb/Yv9W1rYz9CWAY6Svh7v+oQOjWO7aW9aB+cfOlGdQaEln9jVyXo8ETUsQ/CfUT4VWuwTVjOvjwMBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T17:39:02.847792Z"},"content_sha256":"ddea745fa61fc82b333eb145eb5aad48a8ca5e3de141a159cfba43f1a6f2c5aa","schema_version":"1.0","event_id":"sha256:ddea745fa61fc82b333eb145eb5aad48a8ca5e3de141a159cfba43f1a6f2c5aa"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC/bundle.json","state_url":"https://pith.science/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T17:39:02Z","links":{"resolver":"https://pith.science/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC","bundle":"https://pith.science/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC/bundle.json","state":"https://pith.science/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Z3GJD6Y7J4QQRZTNWK7K5HYMUC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:Z3GJD6Y7J4QQRZTNWK7K5HYMUC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2548f62e5e7c5c8d49f506d13600a5b18b3e691cf25c29137998d2a00d76af89","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-31T11:47:10Z","title_canon_sha256":"08e8cd7a8496a6189ed3b6e4812d056f9dc7ca959b5b3a50c6f78c604d67fb33"},"schema_version":"1.0","source":{"id":"1808.10692","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.10692","created_at":"2026-05-18T00:06:45Z"},{"alias_kind":"arxiv_version","alias_value":"1808.10692v1","created_at":"2026-05-18T00:06:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.10692","created_at":"2026-05-18T00:06:45Z"},{"alias_kind":"pith_short_12","alias_value":"Z3GJD6Y7J4QQ","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"Z3GJD6Y7J4QQRZTN","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"Z3GJD6Y7","created_at":"2026-05-18T12:33:04Z"}],"graph_snapshots":[{"event_id":"sha256:ddea745fa61fc82b333eb145eb5aad48a8ca5e3de141a159cfba43f1a6f2c5aa","target":"graph","created_at":"2026-05-18T00:06:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Assisted by neural networks, reinforcement learning agents have been able to solve increasingly complex tasks over the last years. The simulation environment in which the agents interact is an essential component in any reinforcement learning problem. The environment simulates the dynamics of the agents' world and hence provides feedback to their actions in terms of state observations and external rewards. To ease the design and simulation of such environments this work introduces $\\texttt{APES}$, a highly customizable and open source package in Python to create 2D grid-world environments for ","authors_text":"Aqeel Labash, Ardi Tampuu, Jaan Aru, Raul Vicente, Tambet Matiisen","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-31T11:47:10Z","title":"APES: a Python toolbox for simulating reinforcement learning environments"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.10692","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d41040b6999cfa324c29f0fd9c9d6730e07e1fcb789cf4b2c6ce247c6aa7eaa8","target":"record","created_at":"2026-05-18T00:06:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2548f62e5e7c5c8d49f506d13600a5b18b3e691cf25c29137998d2a00d76af89","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-31T11:47:10Z","title_canon_sha256":"08e8cd7a8496a6189ed3b6e4812d056f9dc7ca959b5b3a50c6f78c604d67fb33"},"schema_version":"1.0","source":{"id":"1808.10692","kind":"arxiv","version":1}},"canonical_sha256":"cecc91fb1f4f2108e66db2beae9f0ca0afda8e7d66378f57a4eee39bdcc69cad","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cecc91fb1f4f2108e66db2beae9f0ca0afda8e7d66378f57a4eee39bdcc69cad","first_computed_at":"2026-05-18T00:06:45.071487Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:06:45.071487Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"AlSVsBYCxyEadblv24EzNoCcqD5lUQ5/nucE8lCbh4ndUJafGvlnjJAWfOk0aPc7xci2/NMqo2mk5SAA2k4ACQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:06:45.072047Z","signed_message":"canonical_sha256_bytes"},"source_id":"1808.10692","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d41040b6999cfa324c29f0fd9c9d6730e07e1fcb789cf4b2c6ce247c6aa7eaa8","sha256:ddea745fa61fc82b333eb145eb5aad48a8ca5e3de141a159cfba43f1a6f2c5aa"],"state_sha256":"f04292ca7be92db0beef5cd0454b3758674ba6afc32f5b6d89a73b9fe75ee862"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qJclMuVYjEgA2Rh/WH6Uq0woHoPD9YIPB8iNIrPih6ub1RU+Ifr8Lded5aeJu3LxwguB4bFad7tX9Eea3/8NDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T17:39:02.851830Z","bundle_sha256":"2fec84de02308b6ac71182da47069937693a93cb092b2bbabd897450a11294f1"}}