{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:Y7EL4TKGLBJDBC6DI4TEFCJTT5","short_pith_number":"pith:Y7EL4TKG","canonical_record":{"source":{"id":"2403.17091","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-03-25T18:28:45Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"8270f7684b517635a7296936bc0f86d589db4815bba376df7e8646a60e5094b3","abstract_canon_sha256":"6066ffed52ad3990da8b62f012a43a597e43ff402b921ab072423fed48aabbc6"},"schema_version":"1.0"},"canonical_sha256":"c7c8be4d465852308bc347264289339f515e2a73362e09887485d3a9c4c61517","source":{"kind":"arxiv","id":"2403.17091","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2403.17091","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"arxiv_version","alias_value":"2403.17091v1","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2403.17091","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"pith_short_12","alias_value":"Y7EL4TKGLBJD","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"pith_short_16","alias_value":"Y7EL4TKGLBJDBC6D","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"pith_short_8","alias_value":"Y7EL4TKG","created_at":"2026-07-05T08:00:45Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:Y7EL4TKGLBJDBC6DI4TEFCJTT5","target":"record","payload":{"canonical_record":{"source":{"id":"2403.17091","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-03-25T18:28:45Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"8270f7684b517635a7296936bc0f86d589db4815bba376df7e8646a60e5094b3","abstract_canon_sha256":"6066ffed52ad3990da8b62f012a43a597e43ff402b921ab072423fed48aabbc6"},"schema_version":"1.0"},"canonical_sha256":"c7c8be4d465852308bc347264289339f515e2a73362e09887485d3a9c4c61517","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T08:00:45.029824Z","signature_b64":"0XOGGcf4kbr4R5kTfLsKDxC8xQMAUR2AXffWdHFEsG2vQYYAF5BaYuyQLzEWVGnSfUfjO0RNFOulXjKcPVtzAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7c8be4d465852308bc347264289339f515e2a73362e09887485d3a9c4c61517","last_reissued_at":"2026-07-05T08:00:45.029177Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T08:00:45.029177Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2403.17091","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:00:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SVKGKs7F5H7QX3NYFVR5VPmRBUWyarCXTopeNyS5ArHrQw7L6Ck476rlvDON6p35BFHC4vo0vuXPzMkBhf38AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T14:31:35.339219Z"},"content_sha256":"64bbcddc49fc044d56e36bd7bdd561f2e4d0f9a88b055be68e2b935b98e0c202","schema_version":"1.0","event_id":"sha256:64bbcddc49fc044d56e36bd7bdd561f2e4d0f9a88b055be68e2b935b98e0c202"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:Y7EL4TKGLBJDBC6DI4TEFCJTT5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Offline Reinforcement Learning: Role of State Aggregation and Trajectory Data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexander Rakhlin, Ayush Sekhari, Chen-Yu Wei, Zeyu Jia","submitted_at":"2024-03-25T18:28:45Z","abstract_excerpt":"We revisit the problem of offline reinforcement learning with value function realizability but without Bellman completeness. Previous work by Xie and Jiang (2021) and Foster et al. (2022) left open the question whether a bounded concentrability coefficient along with trajectory-based offline data admits a polynomial sample complexity. In this work, we provide a negative answer to this question for the task of offline policy evaluation. In addition to addressing this question, we provide a rather complete picture for offline policy evaluation with only value function realizability. Our primary "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2403.17091","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2403.17091/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:00:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7VRGppt08FDDyFspkvmrBX5NeB46G7AcdM2bVwd5T5nOKJa2s0W9qvd7K5hWEyHYJx/BKKPav89g+8ZLoT5ZCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T14:31:35.339930Z"},"content_sha256":"6912a441bb472d3a08ba5579d5195cd0680b532a25dbed325afb5ae70ef37808","schema_version":"1.0","event_id":"sha256:6912a441bb472d3a08ba5579d5195cd0680b532a25dbed325afb5ae70ef37808"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5/bundle.json","state_url":"https://pith.science/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T14:31:35Z","links":{"resolver":"https://pith.science/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5","bundle":"https://pith.science/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5/bundle.json","state":"https://pith.science/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Y7EL4TKGLBJDBC6DI4TEFCJTT5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:Y7EL4TKGLBJDBC6DI4TEFCJTT5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6066ffed52ad3990da8b62f012a43a597e43ff402b921ab072423fed48aabbc6","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-03-25T18:28:45Z","title_canon_sha256":"8270f7684b517635a7296936bc0f86d589db4815bba376df7e8646a60e5094b3"},"schema_version":"1.0","source":{"id":"2403.17091","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2403.17091","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"arxiv_version","alias_value":"2403.17091v1","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2403.17091","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"pith_short_12","alias_value":"Y7EL4TKGLBJD","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"pith_short_16","alias_value":"Y7EL4TKGLBJDBC6D","created_at":"2026-07-05T08:00:45Z"},{"alias_kind":"pith_short_8","alias_value":"Y7EL4TKG","created_at":"2026-07-05T08:00:45Z"}],"graph_snapshots":[{"event_id":"sha256:6912a441bb472d3a08ba5579d5195cd0680b532a25dbed325afb5ae70ef37808","target":"graph","created_at":"2026-07-05T08:00:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2403.17091/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We revisit the problem of offline reinforcement learning with value function realizability but without Bellman completeness. Previous work by Xie and Jiang (2021) and Foster et al. (2022) left open the question whether a bounded concentrability coefficient along with trajectory-based offline data admits a polynomial sample complexity. In this work, we provide a negative answer to this question for the task of offline policy evaluation. In addition to addressing this question, we provide a rather complete picture for offline policy evaluation with only value function realizability. Our primary ","authors_text":"Alexander Rakhlin, Ayush Sekhari, Chen-Yu Wei, Zeyu Jia","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-03-25T18:28:45Z","title":"Offline Reinforcement Learning: Role of State Aggregation and Trajectory Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2403.17091","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:64bbcddc49fc044d56e36bd7bdd561f2e4d0f9a88b055be68e2b935b98e0c202","target":"record","created_at":"2026-07-05T08:00:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6066ffed52ad3990da8b62f012a43a597e43ff402b921ab072423fed48aabbc6","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-03-25T18:28:45Z","title_canon_sha256":"8270f7684b517635a7296936bc0f86d589db4815bba376df7e8646a60e5094b3"},"schema_version":"1.0","source":{"id":"2403.17091","kind":"arxiv","version":1}},"canonical_sha256":"c7c8be4d465852308bc347264289339f515e2a73362e09887485d3a9c4c61517","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c7c8be4d465852308bc347264289339f515e2a73362e09887485d3a9c4c61517","first_computed_at":"2026-07-05T08:00:45.029177Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T08:00:45.029177Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0XOGGcf4kbr4R5kTfLsKDxC8xQMAUR2AXffWdHFEsG2vQYYAF5BaYuyQLzEWVGnSfUfjO0RNFOulXjKcPVtzAg==","signature_status":"signed_v1","signed_at":"2026-07-05T08:00:45.029824Z","signed_message":"canonical_sha256_bytes"},"source_id":"2403.17091","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:64bbcddc49fc044d56e36bd7bdd561f2e4d0f9a88b055be68e2b935b98e0c202","sha256:6912a441bb472d3a08ba5579d5195cd0680b532a25dbed325afb5ae70ef37808"],"state_sha256":"7d608a32de211ab5fa266ec189ecedbda41f86ca3614ac767cf09b2eb005a7f5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Kbb2zdd+KuB0Fx7yA+l0cBnvo8yXHwwXjmS0jRZOVOV8rhYuLob4lYS9lQBVRrGfqUPv9qphyonUAj3j4NxGCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T14:31:35.344717Z","bundle_sha256":"e297b97c71edfe2af61af62c098a67387e463aca9718cee20825e0d084061747"}}