{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:BVHCVIQKSWIIIS33GKSE2GGH37","short_pith_number":"pith:BVHCVIQK","canonical_record":{"source":{"id":"2605.29032","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T19:31:37Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"dee1828ac55b776c4d4fe72959f813254890e14e08615d880c3614374e37b33a","abstract_canon_sha256":"1c56b0c790c1ed0c762c717dbcfbdc938079afe505e3f5170ff88f297d56c436"},"schema_version":"1.0"},"canonical_sha256":"0d4e2aa20a9590844b7b32a44d18c7dfc8333499a8fa01a72d7583e38345fcc0","source":{"kind":"arxiv","id":"2605.29032","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.29032","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"arxiv_version","alias_value":"2605.29032v1","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29032","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"pith_short_12","alias_value":"BVHCVIQKSWII","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"pith_short_16","alias_value":"BVHCVIQKSWIIIS33","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"pith_short_8","alias_value":"BVHCVIQK","created_at":"2026-05-29T01:04:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:BVHCVIQKSWIIIS33GKSE2GGH37","target":"record","payload":{"canonical_record":{"source":{"id":"2605.29032","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T19:31:37Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"dee1828ac55b776c4d4fe72959f813254890e14e08615d880c3614374e37b33a","abstract_canon_sha256":"1c56b0c790c1ed0c762c717dbcfbdc938079afe505e3f5170ff88f297d56c436"},"schema_version":"1.0"},"canonical_sha256":"0d4e2aa20a9590844b7b32a44d18c7dfc8333499a8fa01a72d7583e38345fcc0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:04:43.973393Z","signature_b64":"LPFIbPnn08bi5QzidAaDHFR+zMtOzZ1BJixG/cGL7cGE0GiqoXCHX+MH7f4X/OPQrXzaB/FWBjOaDOzUXJJeDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0d4e2aa20a9590844b7b32a44d18c7dfc8333499a8fa01a72d7583e38345fcc0","last_reissued_at":"2026-05-29T01:04:43.972889Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:04:43.972889Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.29032","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:04:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Vt+iXvj/KCyvyFnu0icijBRNXmsVDCsQ0NQlJLIcqjPbkh6nc3dgydMDTTJCXXVafQZFWWqqmCQmg+pYJUC+Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T20:16:29.593040Z"},"content_sha256":"65290011cc4fd60f709601dd1c8d405bba4c8eace0a69d1164166b190e0ff54d","schema_version":"1.0","event_id":"sha256:65290011cc4fd60f709601dd1c8d405bba4c8eace0a69d1164166b190e0ff54d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:BVHCVIQKSWIIIS33GKSE2GGH37","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Theoretical Foundations and Effective Algorithms for Policy-Aware Simulator Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Christoph Dann, Mehryar Mohri, Yishay Mansour","submitted_at":"2026-05-27T19:31:37Z","abstract_excerpt":"Model-based reinforcement learning (MBRL) agents typically learn world models by minimizing predictive loss. However, powerful RL optimizers inevitably exploit minor model inaccuracies, leading to simulator exploitation and a reality gap where policies succeed in simulation but fail in the real world. We propose that the objective for learning simulators should be strategic robustness rather than predictive accuracy, and formulate this as a zero-sum minimax game between a model player and an adversarial policy player. We provide a comprehensive theoretical analysis: (1) an online learning guar"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29032","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.29032/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:04:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XbQtHF/mAuTU9PA63bmmJdBlOjh19HLEvHq+ZSBTUi3p7nJWm8mHsEkAwSvDdWfjMu7yUShOiMSLmcyTPl/EAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T20:16:29.593731Z"},"content_sha256":"dafd3a78b9ec14aaf6cce186786d5f13306a64b4f5a3045bd1de0e2be5167ebe","schema_version":"1.0","event_id":"sha256:dafd3a78b9ec14aaf6cce186786d5f13306a64b4f5a3045bd1de0e2be5167ebe"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BVHCVIQKSWIIIS33GKSE2GGH37/bundle.json","state_url":"https://pith.science/pith/BVHCVIQKSWIIIS33GKSE2GGH37/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BVHCVIQKSWIIIS33GKSE2GGH37/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T20:16:29Z","links":{"resolver":"https://pith.science/pith/BVHCVIQKSWIIIS33GKSE2GGH37","bundle":"https://pith.science/pith/BVHCVIQKSWIIIS33GKSE2GGH37/bundle.json","state":"https://pith.science/pith/BVHCVIQKSWIIIS33GKSE2GGH37/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BVHCVIQKSWIIIS33GKSE2GGH37/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:BVHCVIQKSWIIIS33GKSE2GGH37","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1c56b0c790c1ed0c762c717dbcfbdc938079afe505e3f5170ff88f297d56c436","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T19:31:37Z","title_canon_sha256":"dee1828ac55b776c4d4fe72959f813254890e14e08615d880c3614374e37b33a"},"schema_version":"1.0","source":{"id":"2605.29032","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.29032","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"arxiv_version","alias_value":"2605.29032v1","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29032","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"pith_short_12","alias_value":"BVHCVIQKSWII","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"pith_short_16","alias_value":"BVHCVIQKSWIIIS33","created_at":"2026-05-29T01:04:43Z"},{"alias_kind":"pith_short_8","alias_value":"BVHCVIQK","created_at":"2026-05-29T01:04:43Z"}],"graph_snapshots":[{"event_id":"sha256:dafd3a78b9ec14aaf6cce186786d5f13306a64b4f5a3045bd1de0e2be5167ebe","target":"graph","created_at":"2026-05-29T01:04:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.29032/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Model-based reinforcement learning (MBRL) agents typically learn world models by minimizing predictive loss. However, powerful RL optimizers inevitably exploit minor model inaccuracies, leading to simulator exploitation and a reality gap where policies succeed in simulation but fail in the real world. We propose that the objective for learning simulators should be strategic robustness rather than predictive accuracy, and formulate this as a zero-sum minimax game between a model player and an adversarial policy player. We provide a comprehensive theoretical analysis: (1) an online learning guar","authors_text":"Christoph Dann, Mehryar Mohri, Yishay Mansour","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T19:31:37Z","title":"Theoretical Foundations and Effective Algorithms for Policy-Aware Simulator Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29032","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:65290011cc4fd60f709601dd1c8d405bba4c8eace0a69d1164166b190e0ff54d","target":"record","created_at":"2026-05-29T01:04:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1c56b0c790c1ed0c762c717dbcfbdc938079afe505e3f5170ff88f297d56c436","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T19:31:37Z","title_canon_sha256":"dee1828ac55b776c4d4fe72959f813254890e14e08615d880c3614374e37b33a"},"schema_version":"1.0","source":{"id":"2605.29032","kind":"arxiv","version":1}},"canonical_sha256":"0d4e2aa20a9590844b7b32a44d18c7dfc8333499a8fa01a72d7583e38345fcc0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0d4e2aa20a9590844b7b32a44d18c7dfc8333499a8fa01a72d7583e38345fcc0","first_computed_at":"2026-05-29T01:04:43.972889Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:04:43.972889Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LPFIbPnn08bi5QzidAaDHFR+zMtOzZ1BJixG/cGL7cGE0GiqoXCHX+MH7f4X/OPQrXzaB/FWBjOaDOzUXJJeDg==","signature_status":"signed_v1","signed_at":"2026-05-29T01:04:43.973393Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.29032","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:65290011cc4fd60f709601dd1c8d405bba4c8eace0a69d1164166b190e0ff54d","sha256:dafd3a78b9ec14aaf6cce186786d5f13306a64b4f5a3045bd1de0e2be5167ebe"],"state_sha256":"5e4b72d58902371861e20cc5e6bb9f4f27d3b8f4919423b3f8920915f3445dc4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RL9Fu6IiR8PwSo8X676y9MYNEQXZj7gOAyLVLcOAtuEwfXa2SJUNXbG0CLYh227AV2zFt6nDDqePQNTeSYZsBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T20:16:29.596701Z","bundle_sha256":"78400e81dd209674f11759ae10719393300bd0cde715b5c9ccbd8cb8ed2d7592"}}