{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:H2SJ2XXZYBWFBUGQ6IZCTHSCWH","short_pith_number":"pith:H2SJ2XXZ","schema_version":"1.0","canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","source":{"kind":"arxiv","id":"2603.23461","version":2},"attestation_state":"computed","paper":{"title":"End-to-End Efficient RL for Linear Bellman Complete MDPs with Deterministic Transitions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alexander Rakhlin, Nneka Okolo, Zakaria Mhammedi","submitted_at":"2026-03-24T17:32:29Z","abstract_excerpt":"We study reinforcement learning (RL) with linear function approximation in Markov Decision Processes (MDPs) satisfying \\emph{linear Bellman completeness} -- a fundamental setting where the Bellman backup of any linear value function remains linear. While statistically tractable, prior computationally efficient algorithms are either limited to small action spaces or require strong oracle assumptions over the feature space. We provide a computationally efficient algorithm for linear Bellman complete MDPs with \\emph{deterministic transitions}, stochastic initial states, and stochastic rewards. Fo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.23461","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","cross_cats_sorted":[],"title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58","abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:18:24.547845Z","signature_b64":"UaaPOABxV0nNI3H5AqUshOJ3L5U6W0HOe1nPlGCoDEcNZSzYOdraDUHeeI1CXoBhElLYp6Syw8rXA+0VyUF+AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","last_reissued_at":"2026-07-01T01:18:24.547325Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:18:24.547325Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"End-to-End Efficient RL for Linear Bellman Complete MDPs with Deterministic Transitions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alexander Rakhlin, Nneka Okolo, Zakaria Mhammedi","submitted_at":"2026-03-24T17:32:29Z","abstract_excerpt":"We study reinforcement learning (RL) with linear function approximation in Markov Decision Processes (MDPs) satisfying \\emph{linear Bellman completeness} -- a fundamental setting where the Bellman backup of any linear value function remains linear. While statistically tractable, prior computationally efficient algorithms are either limited to small action spaces or require strong oracle assumptions over the feature space. We provide a computationally efficient algorithm for linear Bellman complete MDPs with \\emph{deterministic transitions}, stochastic initial states, and stochastic rewards. Fo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.23461","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.23461/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.23461","created_at":"2026-07-01T01:18:24.547398+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.23461v2","created_at":"2026-07-01T01:18:24.547398+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.23461","created_at":"2026-07-01T01:18:24.547398+00:00"},{"alias_kind":"pith_short_12","alias_value":"H2SJ2XXZYBWF","created_at":"2026-07-01T01:18:24.547398+00:00"},{"alias_kind":"pith_short_16","alias_value":"H2SJ2XXZYBWFBUGQ","created_at":"2026-07-01T01:18:24.547398+00:00"},{"alias_kind":"pith_short_8","alias_value":"H2SJ2XXZ","created_at":"2026-07-01T01:18:24.547398+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH","json":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH.json","graph_json":"https://pith.science/api/pith-number/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/graph.json","events_json":"https://pith.science/api/pith-number/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/events.json","paper":"https://pith.science/paper/H2SJ2XXZ"},"agent_actions":{"view_html":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH","download_json":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH.json","view_paper":"https://pith.science/paper/H2SJ2XXZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.23461&json=true","fetch_graph":"https://pith.science/api/pith-number/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/graph.json","fetch_events":"https://pith.science/api/pith-number/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/action/storage_attestation","attest_author":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/action/author_attestation","sign_citation":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/action/citation_signature","submit_replication":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/action/replication_record"}},"created_at":"2026-07-01T01:18:24.547398+00:00","updated_at":"2026-07-01T01:18:24.547398+00:00"}