{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GFENIXXPBRXCNB4QFKRJ2BLP7Y","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"51a8c63dfcb3a315208ed65dbcfad6cd3edd265f9dae3b2bf3d06e6b994b9c86","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:24:24Z","title_canon_sha256":"9b55533dea4a4926a21d5228939505d9bd9b8e1254370d6bb3627af04ab6f34f"},"schema_version":"1.0","source":{"id":"2605.30719","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30719","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30719v1","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30719","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_12","alias_value":"GFENIXXPBRXC","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_16","alias_value":"GFENIXXPBRXCNB4Q","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_8","alias_value":"GFENIXXP","created_at":"2026-06-01T01:03:12Z"}],"graph_snapshots":[{"event_id":"sha256:c8f0d8d36d2ca6734aaece5cd956ef096725b018a585e2fc34838606ec27740f","target":"graph","created_at":"2026-06-01T01:03:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30719/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study when large language models (LLMs) can serve as effective black-box policy optimizers for reinforcement learning (RL) tasks, i.e., when can we replace classical RL algorithms with an LLM? We explore this question by introducing Prompted Policy Optimization (PromptPO), an iterative method that prompts an LLM with Python descriptions of the state space, action space, and reward function, then has it generate and refine executable policies based on rollout feedback. Across hard exploration environments, Meta-World robotics tasks, and several real-world control problems, PromptPO often mat","authors_text":"Emma Brunskill, Stephane Hatgis-Kessell","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:24:24Z","title":"When are LLMs Sufficient Policy Optimizers for Sequential RL Tasks?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30719","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:06d4b32323c6c92747094e9309c27918d5eebf4e9637256cd6a0347990c9ad6d","target":"record","created_at":"2026-06-01T01:03:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"51a8c63dfcb3a315208ed65dbcfad6cd3edd265f9dae3b2bf3d06e6b994b9c86","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:24:24Z","title_canon_sha256":"9b55533dea4a4926a21d5228939505d9bd9b8e1254370d6bb3627af04ab6f34f"},"schema_version":"1.0","source":{"id":"2605.30719","kind":"arxiv","version":1}},"canonical_sha256":"3148d45eef0c6e2687902aa29d056ffe0604ece6ef537a24fc35e682fcbb9da1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3148d45eef0c6e2687902aa29d056ffe0604ece6ef537a24fc35e682fcbb9da1","first_computed_at":"2026-06-01T01:03:12.140135Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:12.140135Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6V5x1L0wA9qlSSwOePGhmJHbVJn76Rzv6+LffRx78bHZ/5Gf/mIgJbbSG930eq1ymgV3fhhfKD054DFStWtODA==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:12.140927Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30719","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:06d4b32323c6c92747094e9309c27918d5eebf4e9637256cd6a0347990c9ad6d","sha256:c8f0d8d36d2ca6734aaece5cd956ef096725b018a585e2fc34838606ec27740f"],"state_sha256":"3027c3828374d06ae1910993cc621fb59444c5c7cf54f0fdb5f79439dae3c112"}