{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:EWYS4RZ32GW3XPXNYDBJTWQXHB","short_pith_number":"pith:EWYS4RZ3","canonical_record":{"source":{"id":"1811.08540","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-21T01:48:17Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"1aa070b1cf532d766c5e9c46c46660952a9fc7758892ddec042e69dce3bd1639","abstract_canon_sha256":"1b113ceba0e118bbb8aaf20dcc5a6afe7358e88218f48e57c38492c2d3266797"},"schema_version":"1.0"},"canonical_sha256":"25b12e473bd1adbbbeedc0c299da173840bbb937d22f6f29d1885a983205a16f","source":{"kind":"arxiv","id":"1811.08540","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.08540","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.08540v3","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.08540","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"pith_short_12","alias_value":"EWYS4RZ32GW3","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"EWYS4RZ32GW3XPXN","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"EWYS4RZ3","created_at":"2026-05-18T12:32:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:EWYS4RZ32GW3XPXNYDBJTWQXHB","target":"record","payload":{"canonical_record":{"source":{"id":"1811.08540","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-21T01:48:17Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"1aa070b1cf532d766c5e9c46c46660952a9fc7758892ddec042e69dce3bd1639","abstract_canon_sha256":"1b113ceba0e118bbb8aaf20dcc5a6afe7358e88218f48e57c38492c2d3266797"},"schema_version":"1.0"},"canonical_sha256":"25b12e473bd1adbbbeedc0c299da173840bbb937d22f6f29d1885a983205a16f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:42.842096Z","signature_b64":"7dQ+HsAqgUG9Z8eEzxLaMAcb31DQ4ga643P036j0cjTEYpkhajdUUJUZsOyPTZJo5aAEinA1POyoX/7Uy2dKDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"25b12e473bd1adbbbeedc0c299da173840bbb937d22f6f29d1885a983205a16f","last_reissued_at":"2026-05-17T23:44:42.841647Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:42.841647Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.08540","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"j2FK/EcAZZzTngxZAHijH+y9SdGPRqnXrLtN6IvS9a0DxLDsvqsod8MbfvLNvazFWGbcm5nKC664R8FZs0xWDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T06:59:06.019579Z"},"content_sha256":"ea5aa1957930f24f2157e22f9ea3beb6ec3465b1f9b3d0d4549c8e3ce0a82a12","schema_version":"1.0","event_id":"sha256:ea5aa1957930f24f2157e22f9ea3beb6ec3465b1f9b3d0d4549c8e3ce0a82a12"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:EWYS4RZ32GW3XPXNYDBJTWQXHB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Model-based RL in Contextual Decision Processes: PAC bounds and Exponential Improvements over Model-free Approaches","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Akshay Krishnamurthy, Alekh Agarwal, John Langford, Nan Jiang, Wen Sun","submitted_at":"2018-11-21T01:48:17Z","abstract_excerpt":"We study the sample complexity of model-based reinforcement learning (henceforth RL) in general contextual decision processes that require strategic exploration to find a near-optimal policy. We design new algorithms for RL with a generic model class and analyze their statistical properties. Our algorithms have sample complexity governed by a new structural parameter called the witness rank, which we show to be small in several settings of interest, including factored MDPs. We also show that the witness rank is never larger than the recently proposed Bellman rank parameter governing the sample"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.08540","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"//UA0pfjFAJt/VGYbboMkA2F3aDmy9yUh8F1SL+Xmnp12CCXQ9dIKaTYdkH4u+Wv25SB1NdOZvafuyyX0QH/CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T06:59:06.019923Z"},"content_sha256":"bf6786a89be95a76df1e89e85347b35d553bdfec32a0f6b0e26fdbb97940e777","schema_version":"1.0","event_id":"sha256:bf6786a89be95a76df1e89e85347b35d553bdfec32a0f6b0e26fdbb97940e777"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB/bundle.json","state_url":"https://pith.science/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T06:59:06Z","links":{"resolver":"https://pith.science/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB","bundle":"https://pith.science/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB/bundle.json","state":"https://pith.science/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EWYS4RZ32GW3XPXNYDBJTWQXHB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:EWYS4RZ32GW3XPXNYDBJTWQXHB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1b113ceba0e118bbb8aaf20dcc5a6afe7358e88218f48e57c38492c2d3266797","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-21T01:48:17Z","title_canon_sha256":"1aa070b1cf532d766c5e9c46c46660952a9fc7758892ddec042e69dce3bd1639"},"schema_version":"1.0","source":{"id":"1811.08540","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.08540","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.08540v3","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.08540","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"pith_short_12","alias_value":"EWYS4RZ32GW3","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"EWYS4RZ32GW3XPXN","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"EWYS4RZ3","created_at":"2026-05-18T12:32:22Z"}],"graph_snapshots":[{"event_id":"sha256:bf6786a89be95a76df1e89e85347b35d553bdfec32a0f6b0e26fdbb97940e777","target":"graph","created_at":"2026-05-17T23:44:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We study the sample complexity of model-based reinforcement learning (henceforth RL) in general contextual decision processes that require strategic exploration to find a near-optimal policy. We design new algorithms for RL with a generic model class and analyze their statistical properties. Our algorithms have sample complexity governed by a new structural parameter called the witness rank, which we show to be small in several settings of interest, including factored MDPs. We also show that the witness rank is never larger than the recently proposed Bellman rank parameter governing the sample","authors_text":"Akshay Krishnamurthy, Alekh Agarwal, John Langford, Nan Jiang, Wen Sun","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-21T01:48:17Z","title":"Model-based RL in Contextual Decision Processes: PAC bounds and Exponential Improvements over Model-free Approaches"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.08540","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ea5aa1957930f24f2157e22f9ea3beb6ec3465b1f9b3d0d4549c8e3ce0a82a12","target":"record","created_at":"2026-05-17T23:44:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1b113ceba0e118bbb8aaf20dcc5a6afe7358e88218f48e57c38492c2d3266797","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-21T01:48:17Z","title_canon_sha256":"1aa070b1cf532d766c5e9c46c46660952a9fc7758892ddec042e69dce3bd1639"},"schema_version":"1.0","source":{"id":"1811.08540","kind":"arxiv","version":3}},"canonical_sha256":"25b12e473bd1adbbbeedc0c299da173840bbb937d22f6f29d1885a983205a16f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"25b12e473bd1adbbbeedc0c299da173840bbb937d22f6f29d1885a983205a16f","first_computed_at":"2026-05-17T23:44:42.841647Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:42.841647Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"7dQ+HsAqgUG9Z8eEzxLaMAcb31DQ4ga643P036j0cjTEYpkhajdUUJUZsOyPTZJo5aAEinA1POyoX/7Uy2dKDA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:42.842096Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.08540","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ea5aa1957930f24f2157e22f9ea3beb6ec3465b1f9b3d0d4549c8e3ce0a82a12","sha256:bf6786a89be95a76df1e89e85347b35d553bdfec32a0f6b0e26fdbb97940e777"],"state_sha256":"80cd25f81afd25a0b4a6e911f44a01fbd9062650cc700802ccd4187a9adec143"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"N8gMCbFR+MaBEB29SHqSG+fmBlKp9n1MzFQ3DioxS02pOXCuoOEtDOviOfvlGV6rtnZrAMgOE+AfIDmb+JF4Ag==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T06:59:06.021816Z","bundle_sha256":"3cbb3fa94070f9fd25fd11c2d11d0badff768d799ffb2a80113f7fea6f4a9f24"}}