{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:Z3IYHDQEQBDDSEJDRN6ISZEAPP","short_pith_number":"pith:Z3IYHDQE","schema_version":"1.0","canonical_sha256":"ced1838e0480463911238b7c8964807bde37d67a8721804f662a15a22ab00a93","source":{"kind":"arxiv","id":"1602.02722","version":4},"attestation_state":"computed","paper":{"title":"PAC Reinforcement Learning with Rich Observations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Akshay Krishnamurthy, Alekh Agarwal, John Langford","submitted_at":"2016-02-08T20:12:50Z","abstract_excerpt":"We propose and study a new model for reinforcement learning with rich observations, generalizing contextual bandits to sequential decision making. These models require an agent to take actions based on observations (features) with the goal of achieving long-term performance competitive with a large set of policies. To avoid barriers to sample-efficient learning associated with large observation spaces and general POMDPs, we focus on problems that can be summarized by a small number of hidden states and have long-term rewards that are predictable by a reactive function class. In this setting, w"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1602.02722","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-02-08T20:12:50Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"c35b1a96c0647abd5a38879d0340cfb18acc4cb99aeb7f1853a27097bef658ea","abstract_canon_sha256":"a9ef56be068ae8c8f5027803ffee692a750fff615f7826f8d2ff133614450880"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:01:00.315810Z","signature_b64":"FFNCqxXSqvy066XysdJ62FbGBqM3a6Fl16r2clngmkw/c2++yuxBWv24//FHccwh2taxvLSOIa81IMumM4KxDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ced1838e0480463911238b7c8964807bde37d67a8721804f662a15a22ab00a93","last_reissued_at":"2026-05-18T01:01:00.314933Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:01:00.314933Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PAC Reinforcement Learning with Rich Observations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Akshay Krishnamurthy, Alekh Agarwal, John Langford","submitted_at":"2016-02-08T20:12:50Z","abstract_excerpt":"We propose and study a new model for reinforcement learning with rich observations, generalizing contextual bandits to sequential decision making. These models require an agent to take actions based on observations (features) with the goal of achieving long-term performance competitive with a large set of policies. To avoid barriers to sample-efficient learning associated with large observation spaces and general POMDPs, we focus on problems that can be summarized by a small number of hidden states and have long-term rewards that are predictable by a reactive function class. In this setting, w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1602.02722","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1602.02722","created_at":"2026-05-18T01:01:00.315066+00:00"},{"alias_kind":"arxiv_version","alias_value":"1602.02722v4","created_at":"2026-05-18T01:01:00.315066+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1602.02722","created_at":"2026-05-18T01:01:00.315066+00:00"},{"alias_kind":"pith_short_12","alias_value":"Z3IYHDQEQBDD","created_at":"2026-05-18T12:30:53.716459+00:00"},{"alias_kind":"pith_short_16","alias_value":"Z3IYHDQEQBDDSEJD","created_at":"2026-05-18T12:30:53.716459+00:00"},{"alias_kind":"pith_short_8","alias_value":"Z3IYHDQE","created_at":"2026-05-18T12:30:53.716459+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP","json":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP.json","graph_json":"https://pith.science/api/pith-number/Z3IYHDQEQBDDSEJDRN6ISZEAPP/graph.json","events_json":"https://pith.science/api/pith-number/Z3IYHDQEQBDDSEJDRN6ISZEAPP/events.json","paper":"https://pith.science/paper/Z3IYHDQE"},"agent_actions":{"view_html":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP","download_json":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP.json","view_paper":"https://pith.science/paper/Z3IYHDQE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1602.02722&json=true","fetch_graph":"https://pith.science/api/pith-number/Z3IYHDQEQBDDSEJDRN6ISZEAPP/graph.json","fetch_events":"https://pith.science/api/pith-number/Z3IYHDQEQBDDSEJDRN6ISZEAPP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP/action/storage_attestation","attest_author":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP/action/author_attestation","sign_citation":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP/action/citation_signature","submit_replication":"https://pith.science/pith/Z3IYHDQEQBDDSEJDRN6ISZEAPP/action/replication_record"}},"created_at":"2026-05-18T01:01:00.315066+00:00","updated_at":"2026-05-18T01:01:00.315066+00:00"}