{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:JAPNXGTIYUG4AVSYSMMKWQOEIQ","short_pith_number":"pith:JAPNXGTI","canonical_record":{"source":{"id":"1805.09045","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-23T10:43:56Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"30c88e75559c8e9c0793fe9c91aeff1255b99668d4101224713584f53c629dc9","abstract_canon_sha256":"4aa9d6f7f7ca4725729fb664b8d049a16b295cbb9c2ee80a00b06293fee42ba8"},"schema_version":"1.0"},"canonical_sha256":"481edb9a68c50dc056589318ab41c444065d876b8e0dd45dc31179ac4962e5cd","source":{"kind":"arxiv","id":"1805.09045","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.09045","created_at":"2026-05-17T23:48:17Z"},{"alias_kind":"arxiv_version","alias_value":"1805.09045v4","created_at":"2026-05-17T23:48:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.09045","created_at":"2026-05-17T23:48:17Z"},{"alias_kind":"pith_short_12","alias_value":"JAPNXGTIYUG4","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"JAPNXGTIYUG4AVSY","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"JAPNXGTI","created_at":"2026-05-18T12:32:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:JAPNXGTIYUG4AVSYSMMKWQOEIQ","target":"record","payload":{"canonical_record":{"source":{"id":"1805.09045","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-23T10:43:56Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"30c88e75559c8e9c0793fe9c91aeff1255b99668d4101224713584f53c629dc9","abstract_canon_sha256":"4aa9d6f7f7ca4725729fb664b8d049a16b295cbb9c2ee80a00b06293fee42ba8"},"schema_version":"1.0"},"canonical_sha256":"481edb9a68c50dc056589318ab41c444065d876b8e0dd45dc31179ac4962e5cd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:17.344186Z","signature_b64":"pgpWg8tMehNENRa9obp1bp0d5owKAQMQaJPa8peAuucANz8TiLS5U+sWq+H8w52RJJWe1x6PfgWkoWdDiUHiAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"481edb9a68c50dc056589318ab41c444065d876b8e0dd45dc31179ac4962e5cd","last_reissued_at":"2026-05-17T23:48:17.343504Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:17.343504Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.09045","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"S+kWOKXNUXnTU/WxltQN6WftlFcVaNzfMgb+0/4BsSTK54hL0FtBbtJXUzpKwZp7Axi12avWak/TY2QLqWO4CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T20:01:42.826119Z"},"content_sha256":"ffda9559b2d46b0355d90a0a599f1ed06c9daf921cb1577f75d02f3a6a420997","schema_version":"1.0","event_id":"sha256:ffda9559b2d46b0355d90a0a599f1ed06c9daf921cb1577f75d02f3a6a420997"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:JAPNXGTIYUG4AVSYSMMKWQOEIQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"When Simple Exploration is Sample Efficient: Identifying Sufficient Conditions for Random Exploration to Yield PAC RL Algorithms","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Emma Brunskill, Yao Liu","submitted_at":"2018-05-23T10:43:56Z","abstract_excerpt":"Efficient exploration is one of the key challenges for reinforcement learning (RL) algorithms. Most traditional sample efficiency bounds require strategic exploration. Recently many deep RL algorithms with simple heuristic exploration strategies that have few formal guarantees, achieve surprising success in many domains. These results pose an important question about understanding these exploration strategies such as $e$-greedy, as well as understanding what characterize the difficulty of exploration in MDPs. In this work we propose problem specific sample complexity bounds of $Q$ learning wit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.09045","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JB1R2z9AFy48dGOOJ/fo9hbkiXELzpXifDHg2eYYqtH4Pc0FwqkPJ9PkePIplJvwSmMWM6e3dTgMz6aFW1DXBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T20:01:42.826477Z"},"content_sha256":"dcd7206b9ebb6f017211b16a481eaa18fe43451fe8c20e10d7c68232537b6a3b","schema_version":"1.0","event_id":"sha256:dcd7206b9ebb6f017211b16a481eaa18fe43451fe8c20e10d7c68232537b6a3b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ/bundle.json","state_url":"https://pith.science/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T20:01:42Z","links":{"resolver":"https://pith.science/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ","bundle":"https://pith.science/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ/bundle.json","state":"https://pith.science/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JAPNXGTIYUG4AVSYSMMKWQOEIQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:JAPNXGTIYUG4AVSYSMMKWQOEIQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4aa9d6f7f7ca4725729fb664b8d049a16b295cbb9c2ee80a00b06293fee42ba8","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-23T10:43:56Z","title_canon_sha256":"30c88e75559c8e9c0793fe9c91aeff1255b99668d4101224713584f53c629dc9"},"schema_version":"1.0","source":{"id":"1805.09045","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.09045","created_at":"2026-05-17T23:48:17Z"},{"alias_kind":"arxiv_version","alias_value":"1805.09045v4","created_at":"2026-05-17T23:48:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.09045","created_at":"2026-05-17T23:48:17Z"},{"alias_kind":"pith_short_12","alias_value":"JAPNXGTIYUG4","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"JAPNXGTIYUG4AVSY","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"JAPNXGTI","created_at":"2026-05-18T12:32:31Z"}],"graph_snapshots":[{"event_id":"sha256:dcd7206b9ebb6f017211b16a481eaa18fe43451fe8c20e10d7c68232537b6a3b","target":"graph","created_at":"2026-05-17T23:48:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Efficient exploration is one of the key challenges for reinforcement learning (RL) algorithms. Most traditional sample efficiency bounds require strategic exploration. Recently many deep RL algorithms with simple heuristic exploration strategies that have few formal guarantees, achieve surprising success in many domains. These results pose an important question about understanding these exploration strategies such as $e$-greedy, as well as understanding what characterize the difficulty of exploration in MDPs. In this work we propose problem specific sample complexity bounds of $Q$ learning wit","authors_text":"Emma Brunskill, Yao Liu","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-23T10:43:56Z","title":"When Simple Exploration is Sample Efficient: Identifying Sufficient Conditions for Random Exploration to Yield PAC RL Algorithms"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.09045","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ffda9559b2d46b0355d90a0a599f1ed06c9daf921cb1577f75d02f3a6a420997","target":"record","created_at":"2026-05-17T23:48:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4aa9d6f7f7ca4725729fb664b8d049a16b295cbb9c2ee80a00b06293fee42ba8","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-23T10:43:56Z","title_canon_sha256":"30c88e75559c8e9c0793fe9c91aeff1255b99668d4101224713584f53c629dc9"},"schema_version":"1.0","source":{"id":"1805.09045","kind":"arxiv","version":4}},"canonical_sha256":"481edb9a68c50dc056589318ab41c444065d876b8e0dd45dc31179ac4962e5cd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"481edb9a68c50dc056589318ab41c444065d876b8e0dd45dc31179ac4962e5cd","first_computed_at":"2026-05-17T23:48:17.343504Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:48:17.343504Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pgpWg8tMehNENRa9obp1bp0d5owKAQMQaJPa8peAuucANz8TiLS5U+sWq+H8w52RJJWe1x6PfgWkoWdDiUHiAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:48:17.344186Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.09045","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ffda9559b2d46b0355d90a0a599f1ed06c9daf921cb1577f75d02f3a6a420997","sha256:dcd7206b9ebb6f017211b16a481eaa18fe43451fe8c20e10d7c68232537b6a3b"],"state_sha256":"7897d9fde36153d681de5f3eac7f3da061246d8d8074f6406d531ddd5b2f3ec7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Btw43tAcNApPvKdarZQbVYaHWC53Ek7YVo9D6LlihnI4AHoVV5SMrfctoeDa9cJIo7l/bcaJ9xk/A+qIKwn5Cg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T20:01:42.828434Z","bundle_sha256":"add1d932ccff7aaa9ec2f86386f7e8f0f6a225e1953573911e9c536c833f691d"}}