{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:2PJ7YCWOO54HK7A6LEQK73LDF5","short_pith_number":"pith:2PJ7YCWO","canonical_record":{"source":{"id":"2606.28433","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-26T03:19:27Z","cross_cats_sorted":[],"title_canon_sha256":"dd12da443feb58a63ff6c8d16091f114171b063dbcd1d8e6a65ffac882c257b9","abstract_canon_sha256":"ccf9f75716e1d968f92be73ca708c0566a17b295e11ac136321eeab3c099250e"},"schema_version":"1.0"},"canonical_sha256":"d3d3fc0ace7778757c1e5920afed632f5750400a6bf6e0a09b99465053f91bd0","source":{"kind":"arxiv","id":"2606.28433","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28433","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28433v1","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28433","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"pith_short_12","alias_value":"2PJ7YCWOO54H","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"pith_short_16","alias_value":"2PJ7YCWOO54HK7A6","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"pith_short_8","alias_value":"2PJ7YCWO","created_at":"2026-06-30T00:15:13Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:2PJ7YCWOO54HK7A6LEQK73LDF5","target":"record","payload":{"canonical_record":{"source":{"id":"2606.28433","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-26T03:19:27Z","cross_cats_sorted":[],"title_canon_sha256":"dd12da443feb58a63ff6c8d16091f114171b063dbcd1d8e6a65ffac882c257b9","abstract_canon_sha256":"ccf9f75716e1d968f92be73ca708c0566a17b295e11ac136321eeab3c099250e"},"schema_version":"1.0"},"canonical_sha256":"d3d3fc0ace7778757c1e5920afed632f5750400a6bf6e0a09b99465053f91bd0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T00:15:13.913626Z","signature_b64":"pFt7IvjcmR7PF+SVk8kSqCAKNulzmtaf2oMvbhVmjllkeVAOmYPBOFBrjdDrB1EUIb94nXzjIQN6lxFEHu9gAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d3d3fc0ace7778757c1e5920afed632f5750400a6bf6e0a09b99465053f91bd0","last_reissued_at":"2026-06-30T00:15:13.913233Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T00:15:13.913233Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.28433","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T00:15:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1TC/UNNDPLR/jJqHdxlurMWKxHqvhbr9a+0IUWmeuVTxWCMOxdlpkECbykonwjtai3EKuoC5DqWJ/Hu2mY6nCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T05:42:24.986471Z"},"content_sha256":"e7a5ba62b03393356d12eb0d2dc479cb2a57aaac34c37b0d49ac12693f61bbb3","schema_version":"1.0","event_id":"sha256:e7a5ba62b03393356d12eb0d2dc479cb2a57aaac34c37b0d49ac12693f61bbb3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:2PJ7YCWOO54HK7A6LEQK73LDF5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Position: RL Researchers Need to Distinguish Between Solving Simulators and Using Simulators as a Proxy","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Esraa Elelimy, Martha White, Matthew Vandergrift","submitted_at":"2026-06-26T03:19:27Z","abstract_excerpt":"One goal in reinforcement learning (RL) research is to understand general-purpose sequential decision-making, using benchmark simulators as a proxy for learning in deployment settings. When running experiments, however, the goal of achieving high performance in the simulator can mutate into focusing exclusively on solving the simulator. To achieve high scores, researchers may adopt solutions exclusively meant for solving simulators, rather than learning while the agent is deployed outside a simulator. Solving simulators is also worthy of investigation, but it is a fundamentally different RL re"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28433","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.28433/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T00:15:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LEPjYKeX+Q3RxCWouSZbAe/Z6J5a07kfeJG7MwTsD4qhufRWbciHsswyTxJY/nVn0g2b8uf8wEeF4BBv/UHsCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T05:42:24.986837Z"},"content_sha256":"e70f793783508639c06c6bf521348621976876abc428084a2344cdf07007b775","schema_version":"1.0","event_id":"sha256:e70f793783508639c06c6bf521348621976876abc428084a2344cdf07007b775"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2PJ7YCWOO54HK7A6LEQK73LDF5/bundle.json","state_url":"https://pith.science/pith/2PJ7YCWOO54HK7A6LEQK73LDF5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2PJ7YCWOO54HK7A6LEQK73LDF5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T05:42:24Z","links":{"resolver":"https://pith.science/pith/2PJ7YCWOO54HK7A6LEQK73LDF5","bundle":"https://pith.science/pith/2PJ7YCWOO54HK7A6LEQK73LDF5/bundle.json","state":"https://pith.science/pith/2PJ7YCWOO54HK7A6LEQK73LDF5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2PJ7YCWOO54HK7A6LEQK73LDF5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:2PJ7YCWOO54HK7A6LEQK73LDF5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ccf9f75716e1d968f92be73ca708c0566a17b295e11ac136321eeab3c099250e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-26T03:19:27Z","title_canon_sha256":"dd12da443feb58a63ff6c8d16091f114171b063dbcd1d8e6a65ffac882c257b9"},"schema_version":"1.0","source":{"id":"2606.28433","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28433","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28433v1","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28433","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"pith_short_12","alias_value":"2PJ7YCWOO54H","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"pith_short_16","alias_value":"2PJ7YCWOO54HK7A6","created_at":"2026-06-30T00:15:13Z"},{"alias_kind":"pith_short_8","alias_value":"2PJ7YCWO","created_at":"2026-06-30T00:15:13Z"}],"graph_snapshots":[{"event_id":"sha256:e70f793783508639c06c6bf521348621976876abc428084a2344cdf07007b775","target":"graph","created_at":"2026-06-30T00:15:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.28433/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"One goal in reinforcement learning (RL) research is to understand general-purpose sequential decision-making, using benchmark simulators as a proxy for learning in deployment settings. When running experiments, however, the goal of achieving high performance in the simulator can mutate into focusing exclusively on solving the simulator. To achieve high scores, researchers may adopt solutions exclusively meant for solving simulators, rather than learning while the agent is deployed outside a simulator. Solving simulators is also worthy of investigation, but it is a fundamentally different RL re","authors_text":"Esraa Elelimy, Martha White, Matthew Vandergrift","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-26T03:19:27Z","title":"Position: RL Researchers Need to Distinguish Between Solving Simulators and Using Simulators as a Proxy"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28433","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e7a5ba62b03393356d12eb0d2dc479cb2a57aaac34c37b0d49ac12693f61bbb3","target":"record","created_at":"2026-06-30T00:15:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ccf9f75716e1d968f92be73ca708c0566a17b295e11ac136321eeab3c099250e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-26T03:19:27Z","title_canon_sha256":"dd12da443feb58a63ff6c8d16091f114171b063dbcd1d8e6a65ffac882c257b9"},"schema_version":"1.0","source":{"id":"2606.28433","kind":"arxiv","version":1}},"canonical_sha256":"d3d3fc0ace7778757c1e5920afed632f5750400a6bf6e0a09b99465053f91bd0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d3d3fc0ace7778757c1e5920afed632f5750400a6bf6e0a09b99465053f91bd0","first_computed_at":"2026-06-30T00:15:13.913233Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T00:15:13.913233Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pFt7IvjcmR7PF+SVk8kSqCAKNulzmtaf2oMvbhVmjllkeVAOmYPBOFBrjdDrB1EUIb94nXzjIQN6lxFEHu9gAQ==","signature_status":"signed_v1","signed_at":"2026-06-30T00:15:13.913626Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.28433","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e7a5ba62b03393356d12eb0d2dc479cb2a57aaac34c37b0d49ac12693f61bbb3","sha256:e70f793783508639c06c6bf521348621976876abc428084a2344cdf07007b775"],"state_sha256":"f3b12c2dff3e09e54be026f5178fb11758bcf715c6cb5c48f374cccaef83ba89"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IdSDp30cjagdrRv6bY273HEBljMsI3veCxYLf4LPyRL/Lp2roFwSF46PAKQ2o2Q07F5dbw7ngK+kSVW4evRuBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T05:42:24.988777Z","bundle_sha256":"286e89cce6274332f1ad3c8101a310cebfa6f221f4a201fedb1bcd86f24e96c6"}}