{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:UOJRCQEZHPEX3MA3WYJHTM3MAH","short_pith_number":"pith:UOJRCQEZ","canonical_record":{"source":{"id":"1809.03447","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-10T16:36:22Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"7eeb0372420b8423c6ff56ace01a14a2af016ee82b11bc1443857a0ae95f4578","abstract_canon_sha256":"45e28129ee8f49d5f4ea440ac041f17603491fb4137f386cac63b5c243ee35e1"},"schema_version":"1.0"},"canonical_sha256":"a3931140993bc97db01bb61279b36c01c53b5676209aca041fa4568fa4a97442","source":{"kind":"arxiv","id":"1809.03447","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.03447","created_at":"2026-05-18T00:06:06Z"},{"alias_kind":"arxiv_version","alias_value":"1809.03447v1","created_at":"2026-05-18T00:06:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.03447","created_at":"2026-05-18T00:06:06Z"},{"alias_kind":"pith_short_12","alias_value":"UOJRCQEZHPEX","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"UOJRCQEZHPEX3MA3","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"UOJRCQEZ","created_at":"2026-05-18T12:32:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:UOJRCQEZHPEX3MA3WYJHTM3MAH","target":"record","payload":{"canonical_record":{"source":{"id":"1809.03447","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-10T16:36:22Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"7eeb0372420b8423c6ff56ace01a14a2af016ee82b11bc1443857a0ae95f4578","abstract_canon_sha256":"45e28129ee8f49d5f4ea440ac041f17603491fb4137f386cac63b5c243ee35e1"},"schema_version":"1.0"},"canonical_sha256":"a3931140993bc97db01bb61279b36c01c53b5676209aca041fa4568fa4a97442","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:06:06.873369Z","signature_b64":"4g2A8/Jo93w93xBycy9TI8Gj8est2lG0dFYkoNbZNTiNjf7/lWXlGxmZ/FCnwnFTXNPsaBohDNOogD/taLkrBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a3931140993bc97db01bb61279b36c01c53b5676209aca041fa4568fa4a97442","last_reissued_at":"2026-05-18T00:06:06.872896Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:06:06.872896Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.03447","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Oe3ukXjxL9cUb6QHvMT1WqtW21JocPhxMa0329z76mpusU5WAypMlhWzPG/rfUqURDUaWPLd0qai3hmwmx4NCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T08:20:39.196558Z"},"content_sha256":"5f0ed377bafb45105f7177098a3f58f5a2f148475f7546ecd7b5bfa302a633d6","schema_version":"1.0","event_id":"sha256:5f0ed377bafb45105f7177098a3f58f5a2f148475f7546ecd7b5bfa302a633d6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:UOJRCQEZHPEX3MA3WYJHTM3MAH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Expert-augmented actor-critic for ViZDoom and Montezumas Revenge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Henryk Michalewski, Micha{\\l} Garmulewicz, Piotr Mi{\\l}o\\'s","submitted_at":"2018-09-10T16:36:22Z","abstract_excerpt":"We propose an expert-augmented actor-critic algorithm, which we evaluate on two environments with sparse rewards: Montezumas Revenge and a demanding maze from the ViZDoom suite. In the case of Montezumas Revenge, an agent trained with our method achieves very good results consistently scoring above 27,000 points (in many experiments beating the first world). With an appropriate choice of hyperparameters, our algorithm surpasses the performance of the expert data. In a number of experiments, we have observed an unreported bug in Montezumas Revenge which allowed the agent to score more than 800,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.03447","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MuVGiNxMEfW9vx5NLYwc2Auw2ZNsM9nQblp1QfolahsCpuK0CFG3i9hl53pDbZIkxS2eqbXxA/c/DvtAkZjnBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T08:20:39.197184Z"},"content_sha256":"42d5b6382045bf4ad74b2fa40465c1c68a56599277e0e72a68cbd2b17793d20b","schema_version":"1.0","event_id":"sha256:42d5b6382045bf4ad74b2fa40465c1c68a56599277e0e72a68cbd2b17793d20b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH/bundle.json","state_url":"https://pith.science/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T08:20:39Z","links":{"resolver":"https://pith.science/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH","bundle":"https://pith.science/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH/bundle.json","state":"https://pith.science/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UOJRCQEZHPEX3MA3WYJHTM3MAH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:UOJRCQEZHPEX3MA3WYJHTM3MAH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"45e28129ee8f49d5f4ea440ac041f17603491fb4137f386cac63b5c243ee35e1","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-10T16:36:22Z","title_canon_sha256":"7eeb0372420b8423c6ff56ace01a14a2af016ee82b11bc1443857a0ae95f4578"},"schema_version":"1.0","source":{"id":"1809.03447","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.03447","created_at":"2026-05-18T00:06:06Z"},{"alias_kind":"arxiv_version","alias_value":"1809.03447v1","created_at":"2026-05-18T00:06:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.03447","created_at":"2026-05-18T00:06:06Z"},{"alias_kind":"pith_short_12","alias_value":"UOJRCQEZHPEX","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"UOJRCQEZHPEX3MA3","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"UOJRCQEZ","created_at":"2026-05-18T12:32:56Z"}],"graph_snapshots":[{"event_id":"sha256:42d5b6382045bf4ad74b2fa40465c1c68a56599277e0e72a68cbd2b17793d20b","target":"graph","created_at":"2026-05-18T00:06:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose an expert-augmented actor-critic algorithm, which we evaluate on two environments with sparse rewards: Montezumas Revenge and a demanding maze from the ViZDoom suite. In the case of Montezumas Revenge, an agent trained with our method achieves very good results consistently scoring above 27,000 points (in many experiments beating the first world). With an appropriate choice of hyperparameters, our algorithm surpasses the performance of the expert data. In a number of experiments, we have observed an unreported bug in Montezumas Revenge which allowed the agent to score more than 800,","authors_text":"Henryk Michalewski, Micha{\\l} Garmulewicz, Piotr Mi{\\l}o\\'s","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-10T16:36:22Z","title":"Expert-augmented actor-critic for ViZDoom and Montezumas Revenge"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.03447","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5f0ed377bafb45105f7177098a3f58f5a2f148475f7546ecd7b5bfa302a633d6","target":"record","created_at":"2026-05-18T00:06:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"45e28129ee8f49d5f4ea440ac041f17603491fb4137f386cac63b5c243ee35e1","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-10T16:36:22Z","title_canon_sha256":"7eeb0372420b8423c6ff56ace01a14a2af016ee82b11bc1443857a0ae95f4578"},"schema_version":"1.0","source":{"id":"1809.03447","kind":"arxiv","version":1}},"canonical_sha256":"a3931140993bc97db01bb61279b36c01c53b5676209aca041fa4568fa4a97442","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a3931140993bc97db01bb61279b36c01c53b5676209aca041fa4568fa4a97442","first_computed_at":"2026-05-18T00:06:06.872896Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:06:06.872896Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4g2A8/Jo93w93xBycy9TI8Gj8est2lG0dFYkoNbZNTiNjf7/lWXlGxmZ/FCnwnFTXNPsaBohDNOogD/taLkrBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:06:06.873369Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.03447","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5f0ed377bafb45105f7177098a3f58f5a2f148475f7546ecd7b5bfa302a633d6","sha256:42d5b6382045bf4ad74b2fa40465c1c68a56599277e0e72a68cbd2b17793d20b"],"state_sha256":"0e0cde170bc24b1bb46b45c87636f4c80a559d33e2afd8e38e3968720828301e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Elykrwbslym0b56O9IdY4xD+CPpTOTgvFPkT4Te64ECXlOF450YrIHTiXjz0O38wjJG3VjzknOUhCIZ30poJCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T08:20:39.200147Z","bundle_sha256":"718de8b7d1e099f6b44386eb24814075ed7fd5ac6910a1867596eb45c7960bab"}}