{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:AP2ERP4WROMF6ADSYSWAL5U4KF","short_pith_number":"pith:AP2ERP4W","canonical_record":{"source":{"id":"2309.03409","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2023-09-07T00:07:15Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"c7267a9ba911c0155389c05e1a869f5061975eba6a5996cee9757c2e248dba5a","abstract_canon_sha256":"696a3ad040aa7c54bce5841a41ecbdc0ec1f364eb2796cfc81219edc4c0c3f43"},"schema_version":"1.0"},"canonical_sha256":"03f448bf968b985f0072c4ac05f69c515c7535edb675e2102f91ffe4f89aa05c","source":{"kind":"arxiv","id":"2309.03409","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.03409","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"arxiv_version","alias_value":"2309.03409v3","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.03409","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"pith_short_12","alias_value":"AP2ERP4WROMF","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"AP2ERP4WROMF6ADS","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"AP2ERP4W","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:AP2ERP4WROMF6ADSYSWAL5U4KF","target":"record","payload":{"canonical_record":{"source":{"id":"2309.03409","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2023-09-07T00:07:15Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"c7267a9ba911c0155389c05e1a869f5061975eba6a5996cee9757c2e248dba5a","abstract_canon_sha256":"696a3ad040aa7c54bce5841a41ecbdc0ec1f364eb2796cfc81219edc4c0c3f43"},"schema_version":"1.0"},"canonical_sha256":"03f448bf968b985f0072c4ac05f69c515c7535edb675e2102f91ffe4f89aa05c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:19.769749Z","signature_b64":"namU8Wke+5UMOCjdCmsifQJq9+/V64eGwK64H3XRZByHkEzNP45HeE6RhsCD5ghwOaMSQkkr/wfryIQu/nLmBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"03f448bf968b985f0072c4ac05f69c515c7535edb675e2102f91ffe4f89aa05c","last_reissued_at":"2026-05-17T23:39:19.769046Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:19.769046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2309.03409","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qkNgipUn7Iq4uRd7+5LNfAiGa/FSfUOSWBeH13tuBaVFWwrf+StJNDkD8mr3ITbbjGmYgTO1hIMQ/w/lpUVDCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-19T22:57:48.915252Z"},"content_sha256":"918c57b6914fe05ea8404e6e8416148d4eaad5ca4917df7f835e4d9a8dcb3159","schema_version":"1.0","event_id":"sha256:918c57b6914fe05ea8404e6e8416148d4eaad5ca4917df7f835e4d9a8dcb3159"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:AP2ERP4WROMF6ADSYSWAL5U4KF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Large Language Models as Optimizers","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"Large language models can optimize solutions by iteratively generating new candidates from a prompt that lists all prior attempts together with their scores.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chengrun Yang, Denny Zhou, Hanxiao Liu, Quoc V. Le, Xinyun Chen, Xuezhi Wang, Yifeng Lu","submitted_at":"2023-09-07T00:07:15Z","abstract_excerpt":"Optimization is ubiquitous. While derivative-based algorithms have been powerful tools for various problems, the absence of gradient imposes challenges on many real-world applications. In this work, we propose Optimization by PROmpting (OPRO), a simple and effective approach to leverage large language models (LLMs) as optimizers, where the optimization task is described in natural language. In each optimization step, the LLM generates new solutions from the prompt that contains previously generated solutions with their values, then the new solutions are evaluated and added to the prompt for th"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"With a variety of LLMs, we demonstrate that the best prompts optimized by OPRO outperform human-designed prompts by up to 8% on GSM8K, and by up to 50% on Big-Bench Hard tasks.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That an LLM, when shown a growing list of prior solutions and their numeric scores inside a prompt, will reliably generate new solutions that improve on the best previous score rather than plateau or regress.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Large language models can optimize by being prompted with histories of past solutions and scores to propose better ones, producing prompts that raise accuracy up to 8% on GSM8K and 50% on Big-Bench Hard over human-designed baselines.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Large language models can optimize solutions by iteratively generating new candidates from a prompt that lists all prior attempts together with their scores.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"03a126071ea71762f75dabe3304d110b6f4b17580b8f1f54f2d53aaa5eb20bc9"},"source":{"id":"2309.03409","kind":"arxiv","version":3},"verdict":{"id":"26c1b0a2-2213-4933-8b78-581198dc790b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T23:59:25.433751Z","strongest_claim":"With a variety of LLMs, we demonstrate that the best prompts optimized by OPRO outperform human-designed prompts by up to 8% on GSM8K, and by up to 50% on Big-Bench Hard tasks.","one_line_summary":"Large language models can optimize by being prompted with histories of past solutions and scores to propose better ones, producing prompts that raise accuracy up to 8% on GSM8K and 50% on Big-Bench Hard over human-designed baselines.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That an LLM, when shown a growing list of prior solutions and their numeric scores inside a prompt, will reliably generate new solutions that improve on the best previous score rather than plateau or regress.","pith_extraction_headline":"Large language models can optimize solutions by iteratively generating new candidates from a prompt that lists all prior attempts together with their scores."},"references":{"count":51,"sample":[{"doi":"","year":null,"title":"PaLM 2 Technical Report","work_id":"905ee9a7-ea61-4a94-bd62-2600cbe3e315","ref_index":1,"cited_arxiv_id":"2305.10403","is_internal_anchor":true},{"doi":"","year":null,"title":"Constitutional AI: Harmlessness from AI Feedback","work_id":"faaaa4e0-2676-4fac-a0b4-99aef10d2095","ref_index":2,"cited_arxiv_id":"2212.08073","is_internal_anchor":true},{"doi":"","year":null,"title":"arXiv preprint arXiv:2305.17126 , year=","work_id":"1447b78e-0a79-4af6-8cd4-93220e680d2b","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Dohan and David R","work_id":"80c6bf1e-aa52-4830-9f36-0616ee2d8ef8","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Teaching Large Language Models to Self-Debug","work_id":"cdfb2680-220c-44eb-9edd-867b75fb821d","ref_index":5,"cited_arxiv_id":"2304.05128","is_internal_anchor":true}],"resolved_work":51,"snapshot_sha256":"1167c17795a898642d17396677c24d26a47a2e13074cb32b68985a20f22a7215","internal_anchors":22},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a9e2c1afcd7f0509ee97c2ab69a703856fc472be4b417d398ee73c015140427c"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"26c1b0a2-2213-4933-8b78-581198dc790b"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fNq+p47KNWIoWv7MZWqqE26b7XX38uqoW30V3fyvP4c88z39QnhjlpqsP3/58wMMZtXYYqzw12ZcBzcrfeFJAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-19T22:57:48.915807Z"},"content_sha256":"f3e439bc1c08c059ad78d9c2e17b56fc32a93f91b79ea46b06c88cda53dd9599","schema_version":"1.0","event_id":"sha256:f3e439bc1c08c059ad78d9c2e17b56fc32a93f91b79ea46b06c88cda53dd9599"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AP2ERP4WROMF6ADSYSWAL5U4KF/bundle.json","state_url":"https://pith.science/pith/AP2ERP4WROMF6ADSYSWAL5U4KF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AP2ERP4WROMF6ADSYSWAL5U4KF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-19T22:57:48Z","links":{"resolver":"https://pith.science/pith/AP2ERP4WROMF6ADSYSWAL5U4KF","bundle":"https://pith.science/pith/AP2ERP4WROMF6ADSYSWAL5U4KF/bundle.json","state":"https://pith.science/pith/AP2ERP4WROMF6ADSYSWAL5U4KF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AP2ERP4WROMF6ADSYSWAL5U4KF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:AP2ERP4WROMF6ADSYSWAL5U4KF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"696a3ad040aa7c54bce5841a41ecbdc0ec1f364eb2796cfc81219edc4c0c3f43","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2023-09-07T00:07:15Z","title_canon_sha256":"c7267a9ba911c0155389c05e1a869f5061975eba6a5996cee9757c2e248dba5a"},"schema_version":"1.0","source":{"id":"2309.03409","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.03409","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"arxiv_version","alias_value":"2309.03409v3","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.03409","created_at":"2026-05-17T23:39:19Z"},{"alias_kind":"pith_short_12","alias_value":"AP2ERP4WROMF","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"AP2ERP4WROMF6ADS","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"AP2ERP4W","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:f3e439bc1c08c059ad78d9c2e17b56fc32a93f91b79ea46b06c88cda53dd9599","target":"graph","created_at":"2026-05-17T23:39:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"With a variety of LLMs, we demonstrate that the best prompts optimized by OPRO outperform human-designed prompts by up to 8% on GSM8K, and by up to 50% on Big-Bench Hard tasks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That an LLM, when shown a growing list of prior solutions and their numeric scores inside a prompt, will reliably generate new solutions that improve on the best previous score rather than plateau or regress."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Large language models can optimize by being prompted with histories of past solutions and scores to propose better ones, producing prompts that raise accuracy up to 8% on GSM8K and 50% on Big-Bench Hard over human-designed baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Large language models can optimize solutions by iteratively generating new candidates from a prompt that lists all prior attempts together with their scores."}],"snapshot_sha256":"03a126071ea71762f75dabe3304d110b6f4b17580b8f1f54f2d53aaa5eb20bc9"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a9e2c1afcd7f0509ee97c2ab69a703856fc472be4b417d398ee73c015140427c"},"paper":{"abstract_excerpt":"Optimization is ubiquitous. While derivative-based algorithms have been powerful tools for various problems, the absence of gradient imposes challenges on many real-world applications. In this work, we propose Optimization by PROmpting (OPRO), a simple and effective approach to leverage large language models (LLMs) as optimizers, where the optimization task is described in natural language. In each optimization step, the LLM generates new solutions from the prompt that contains previously generated solutions with their values, then the new solutions are evaluated and added to the prompt for th","authors_text":"Chengrun Yang, Denny Zhou, Hanxiao Liu, Quoc V. Le, Xinyun Chen, Xuezhi Wang, Yifeng Lu","cross_cats":["cs.AI","cs.CL"],"headline":"Large language models can optimize solutions by iteratively generating new candidates from a prompt that lists all prior attempts together with their scores.","license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2023-09-07T00:07:15Z","title":"Large Language Models as Optimizers"},"references":{"count":51,"internal_anchors":22,"resolved_work":51,"sample":[{"cited_arxiv_id":"2305.10403","doi":"","is_internal_anchor":true,"ref_index":1,"title":"PaLM 2 Technical Report","work_id":"905ee9a7-ea61-4a94-bd62-2600cbe3e315","year":null},{"cited_arxiv_id":"2212.08073","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Constitutional AI: Harmlessness from AI Feedback","work_id":"faaaa4e0-2676-4fac-a0b4-99aef10d2095","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"arXiv preprint arXiv:2305.17126 , year=","work_id":"1447b78e-0a79-4af6-8cd4-93220e680d2b","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Dohan and David R","work_id":"80c6bf1e-aa52-4830-9f36-0616ee2d8ef8","year":null},{"cited_arxiv_id":"2304.05128","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Teaching Large Language Models to Self-Debug","work_id":"cdfb2680-220c-44eb-9edd-867b75fb821d","year":null}],"snapshot_sha256":"1167c17795a898642d17396677c24d26a47a2e13074cb32b68985a20f22a7215"},"source":{"id":"2309.03409","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-14T23:59:25.433751Z","id":"26c1b0a2-2213-4933-8b78-581198dc790b","model_set":{"reader":"grok-4.3"},"one_line_summary":"Large language models can optimize by being prompted with histories of past solutions and scores to propose better ones, producing prompts that raise accuracy up to 8% on GSM8K and 50% on Big-Bench Hard over human-designed baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Large language models can optimize solutions by iteratively generating new candidates from a prompt that lists all prior attempts together with their scores.","strongest_claim":"With a variety of LLMs, we demonstrate that the best prompts optimized by OPRO outperform human-designed prompts by up to 8% on GSM8K, and by up to 50% on Big-Bench Hard tasks.","weakest_assumption":"That an LLM, when shown a growing list of prior solutions and their numeric scores inside a prompt, will reliably generate new solutions that improve on the best previous score rather than plateau or regress."}},"verdict_id":"26c1b0a2-2213-4933-8b78-581198dc790b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:918c57b6914fe05ea8404e6e8416148d4eaad5ca4917df7f835e4d9a8dcb3159","target":"record","created_at":"2026-05-17T23:39:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"696a3ad040aa7c54bce5841a41ecbdc0ec1f364eb2796cfc81219edc4c0c3f43","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2023-09-07T00:07:15Z","title_canon_sha256":"c7267a9ba911c0155389c05e1a869f5061975eba6a5996cee9757c2e248dba5a"},"schema_version":"1.0","source":{"id":"2309.03409","kind":"arxiv","version":3}},"canonical_sha256":"03f448bf968b985f0072c4ac05f69c515c7535edb675e2102f91ffe4f89aa05c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"03f448bf968b985f0072c4ac05f69c515c7535edb675e2102f91ffe4f89aa05c","first_computed_at":"2026-05-17T23:39:19.769046Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:19.769046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"namU8Wke+5UMOCjdCmsifQJq9+/V64eGwK64H3XRZByHkEzNP45HeE6RhsCD5ghwOaMSQkkr/wfryIQu/nLmBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:19.769749Z","signed_message":"canonical_sha256_bytes"},"source_id":"2309.03409","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:918c57b6914fe05ea8404e6e8416148d4eaad5ca4917df7f835e4d9a8dcb3159","sha256:f3e439bc1c08c059ad78d9c2e17b56fc32a93f91b79ea46b06c88cda53dd9599"],"state_sha256":"1a329aec8eef9e9b17c6d4c094e8cb442fe8bb16b43ffe157871dc0f85fb2dbd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MbmueIi3eMyBr5WjEQll53OqAfWZU7GLGGJ/itzyc8BwsrQWoOEutpIJrEeC+gCeKLDcC+1tdoae0jegSP5RDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-19T22:57:48.918290Z","bundle_sha256":"c4f5ece39d7ecfdc3f255f328e66c3918430f22f0259a7096cda216ecd55f8f5"}}