{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:34EIGVDPISHHSKQRF57ACMSVLE","short_pith_number":"pith:34EIGVDP","canonical_record":{"source":{"id":"2309.08532","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-09-15T16:50:09Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"174d00ad83c4d3ecc6a4a236a8f78a23dee074fdfdc67694d1ed633f9f15383e","abstract_canon_sha256":"2619070d2d25f0a47bd5837141180c0a5938f3c5883f0b1d5b8f57f6b16c3148"},"schema_version":"1.0"},"canonical_sha256":"df0883546f448e792a112f7e0132555929cb85eab255ab70cb9d0cc4a08d53eb","source":{"kind":"arxiv","id":"2309.08532","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.08532","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"2309.08532v3","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.08532","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"34EIGVDPISHH","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"34EIGVDPISHHSKQR","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"34EIGVDP","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:34EIGVDPISHHSKQRF57ACMSVLE","target":"record","payload":{"canonical_record":{"source":{"id":"2309.08532","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-09-15T16:50:09Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"174d00ad83c4d3ecc6a4a236a8f78a23dee074fdfdc67694d1ed633f9f15383e","abstract_canon_sha256":"2619070d2d25f0a47bd5837141180c0a5938f3c5883f0b1d5b8f57f6b16c3148"},"schema_version":"1.0"},"canonical_sha256":"df0883546f448e792a112f7e0132555929cb85eab255ab70cb9d0cc4a08d53eb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:48.874920Z","signature_b64":"M7FpEd1tCrVQkXSO/56vo16d/jE54mw1UTAglVk4JMiS50vvzmYZmv7hSHqKyE/hfXrAbmXUTDMDEBTLaOHuBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"df0883546f448e792a112f7e0132555929cb85eab255ab70cb9d0cc4a08d53eb","last_reissued_at":"2026-05-17T23:38:48.874329Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:48.874329Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2309.08532","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+xwS26N+7Bcmlh6yGPdNxr0C/mykshiVnq6rAKA+h3PcCNnk7zSPTkMXSqR2I6FQi/9UnoApxUGRxoSqhBbcDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T15:19:18.037505Z"},"content_sha256":"640aca6ef029c85aa547d1ed455dc2fec8c3af36d534e9d3d63cc887b113f8ae","schema_version":"1.0","event_id":"sha256:640aca6ef029c85aa547d1ed455dc2fec8c3af36d534e9d3d63cc887b113f8ae"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:34EIGVDPISHHSKQRF57ACMSVLE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"EvoPrompt: Connecting LLMs with Evolutionary Algorithms Yields Powerful Prompt Optimizers","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"EvoPrompt uses LLMs as evolutionary operators to automatically refine prompts and beat human designs by up to 25 percent on hard benchmarks.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Bei Li, Guoqing Liu, Jiang Bian, Junliang Guo, Kaitao Song, Qingyan Guo, Rui Wang, Xu Tan, Yujiu Yang","submitted_at":"2023-09-15T16:50:09Z","abstract_excerpt":"Large Language Models (LLMs) excel in various tasks, but they rely on carefully crafted prompts that often demand substantial human effort. To automate this process, in this paper, we propose a novel framework for discrete prompt optimization, called EvoPrompt, which borrows the idea of evolutionary algorithms (EAs) as they exhibit good performance and fast convergence. To enable EAs to work on discrete prompts, which are natural language expressions that need to be coherent and human-readable, we connect LLMs with EAs. This approach allows us to simultaneously leverage the powerful language p"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"EvoPrompt significantly outperforms human-engineered prompts and existing methods for automatic prompt generation (e.g., up to 25% on BBH).","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That LLMs can reliably generate coherent, human-readable prompts when acting as evolutionary operators (crossover, mutation) without introducing inconsistencies or quality drift across iterations.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"EvoPrompt uses LLMs to run evolutionary operators on populations of prompts, outperforming human-engineered prompts by up to 25% on BIG-Bench Hard tasks across 31 datasets.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"EvoPrompt uses LLMs as evolutionary operators to automatically refine prompts and beat human designs by up to 25 percent on hard benchmarks.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a1bcdcd453334b363c44673ba5ae3d825e0b8bcfd9471b8ab3d67d0bf4dd7c58"},"source":{"id":"2309.08532","kind":"arxiv","version":3},"verdict":{"id":"f1413506-22cb-4b64-b700-c9c68db50e65","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T06:08:22.712798Z","strongest_claim":"EvoPrompt significantly outperforms human-engineered prompts and existing methods for automatic prompt generation (e.g., up to 25% on BBH).","one_line_summary":"EvoPrompt uses LLMs to run evolutionary operators on populations of prompts, outperforming human-engineered prompts by up to 25% on BIG-Bench Hard tasks across 31 datasets.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That LLMs can reliably generate coherent, human-readable prompts when acting as evolutionary operators (crossover, mutation) without introducing inconsistencies or quality drift across iterations.","pith_extraction_headline":"EvoPrompt uses LLMs as evolutionary operators to automatically refine prompts and beat human designs by up to 25 percent on hard benchmarks."},"references":{"count":153,"sample":[{"doi":"","year":2020,"title":"Asset: A dataset for tuning and evaluation of sentence simplification models with multiple rewriting transformations","work_id":"00cae906-46c9-4c98-a9f7-17eb167406fe","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Promptsource: An integrated development environment and repository for natural language prompts","work_id":"4bf53595-bad7-407f-b926-8a5843b75ed2","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2006,"title":"Self-adapting control parameters in differential evolution: A comparative study on numerical benchmark problems","work_id":"06670735-31dd-4d65-989a-6d0a20b9cb06","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1901,"title":"Language models are few-shot learners","work_id":"b5af3a68-2622-4421-b39b-b1d2fbde2d8d","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2009,"title":"Introduction to derivative-free optimization","work_id":"7a9b1aac-3f7d-4d23-8717-f38597a33b8e","ref_index":6,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":153,"snapshot_sha256":"6cd30c4fa6464426d7dcdf23bd274c790a541d5fa41faa7027605d10c56b8866","internal_anchors":8},"formal_canon":{"evidence_count":2,"snapshot_sha256":"5423d637a46f2e399a4c65a9a8ec99542fee27284abde21da0feacd309cbd034"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"f1413506-22cb-4b64-b700-c9c68db50e65"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YWjaHmcDrPNMNCjmssxQxB6FvdatpFUjZMSOL5ja/Hlf35QisrK3OfTmfFW2xKVjebIBMdCLO0MFc6VNDXigCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T15:19:18.038801Z"},"content_sha256":"404e706cc109b3b2e545a52c053c0b11365eca13ecf4b251832072e9fd4dbe04","schema_version":"1.0","event_id":"sha256:404e706cc109b3b2e545a52c053c0b11365eca13ecf4b251832072e9fd4dbe04"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/34EIGVDPISHHSKQRF57ACMSVLE/bundle.json","state_url":"https://pith.science/pith/34EIGVDPISHHSKQRF57ACMSVLE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/34EIGVDPISHHSKQRF57ACMSVLE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T15:19:18Z","links":{"resolver":"https://pith.science/pith/34EIGVDPISHHSKQRF57ACMSVLE","bundle":"https://pith.science/pith/34EIGVDPISHHSKQRF57ACMSVLE/bundle.json","state":"https://pith.science/pith/34EIGVDPISHHSKQRF57ACMSVLE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/34EIGVDPISHHSKQRF57ACMSVLE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:34EIGVDPISHHSKQRF57ACMSVLE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2619070d2d25f0a47bd5837141180c0a5938f3c5883f0b1d5b8f57f6b16c3148","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-09-15T16:50:09Z","title_canon_sha256":"174d00ad83c4d3ecc6a4a236a8f78a23dee074fdfdc67694d1ed633f9f15383e"},"schema_version":"1.0","source":{"id":"2309.08532","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.08532","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"2309.08532v3","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.08532","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"34EIGVDPISHH","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"34EIGVDPISHHSKQR","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"34EIGVDP","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:404e706cc109b3b2e545a52c053c0b11365eca13ecf4b251832072e9fd4dbe04","target":"graph","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"EvoPrompt significantly outperforms human-engineered prompts and existing methods for automatic prompt generation (e.g., up to 25% on BBH)."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That LLMs can reliably generate coherent, human-readable prompts when acting as evolutionary operators (crossover, mutation) without introducing inconsistencies or quality drift across iterations."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"EvoPrompt uses LLMs to run evolutionary operators on populations of prompts, outperforming human-engineered prompts by up to 25% on BIG-Bench Hard tasks across 31 datasets."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"EvoPrompt uses LLMs as evolutionary operators to automatically refine prompts and beat human designs by up to 25 percent on hard benchmarks."}],"snapshot_sha256":"a1bcdcd453334b363c44673ba5ae3d825e0b8bcfd9471b8ab3d67d0bf4dd7c58"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"5423d637a46f2e399a4c65a9a8ec99542fee27284abde21da0feacd309cbd034"},"paper":{"abstract_excerpt":"Large Language Models (LLMs) excel in various tasks, but they rely on carefully crafted prompts that often demand substantial human effort. To automate this process, in this paper, we propose a novel framework for discrete prompt optimization, called EvoPrompt, which borrows the idea of evolutionary algorithms (EAs) as they exhibit good performance and fast convergence. To enable EAs to work on discrete prompts, which are natural language expressions that need to be coherent and human-readable, we connect LLMs with EAs. This approach allows us to simultaneously leverage the powerful language p","authors_text":"Bei Li, Guoqing Liu, Jiang Bian, Junliang Guo, Kaitao Song, Qingyan Guo, Rui Wang, Xu Tan, Yujiu Yang","cross_cats":["cs.AI"],"headline":"EvoPrompt uses LLMs as evolutionary operators to automatically refine prompts and beat human designs by up to 25 percent on hard benchmarks.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-09-15T16:50:09Z","title":"EvoPrompt: Connecting LLMs with Evolutionary Algorithms Yields Powerful Prompt Optimizers"},"references":{"count":153,"internal_anchors":8,"resolved_work":153,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Asset: A dataset for tuning and evaluation of sentence simplification models with multiple rewriting transformations","work_id":"00cae906-46c9-4c98-a9f7-17eb167406fe","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Promptsource: An integrated development environment and repository for natural language prompts","work_id":"4bf53595-bad7-407f-b926-8a5843b75ed2","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Self-adapting control parameters in differential evolution: A comparative study on numerical benchmark problems","work_id":"06670735-31dd-4d65-989a-6d0a20b9cb06","year":2006},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Language models are few-shot learners","work_id":"b5af3a68-2622-4421-b39b-b1d2fbde2d8d","year":1901},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"Introduction to derivative-free optimization","work_id":"7a9b1aac-3f7d-4d23-8717-f38597a33b8e","year":2009}],"snapshot_sha256":"6cd30c4fa6464426d7dcdf23bd274c790a541d5fa41faa7027605d10c56b8866"},"source":{"id":"2309.08532","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-16T06:08:22.712798Z","id":"f1413506-22cb-4b64-b700-c9c68db50e65","model_set":{"reader":"grok-4.3"},"one_line_summary":"EvoPrompt uses LLMs to run evolutionary operators on populations of prompts, outperforming human-engineered prompts by up to 25% on BIG-Bench Hard tasks across 31 datasets.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"EvoPrompt uses LLMs as evolutionary operators to automatically refine prompts and beat human designs by up to 25 percent on hard benchmarks.","strongest_claim":"EvoPrompt significantly outperforms human-engineered prompts and existing methods for automatic prompt generation (e.g., up to 25% on BBH).","weakest_assumption":"That LLMs can reliably generate coherent, human-readable prompts when acting as evolutionary operators (crossover, mutation) without introducing inconsistencies or quality drift across iterations."}},"verdict_id":"f1413506-22cb-4b64-b700-c9c68db50e65"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:640aca6ef029c85aa547d1ed455dc2fec8c3af36d534e9d3d63cc887b113f8ae","target":"record","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2619070d2d25f0a47bd5837141180c0a5938f3c5883f0b1d5b8f57f6b16c3148","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-09-15T16:50:09Z","title_canon_sha256":"174d00ad83c4d3ecc6a4a236a8f78a23dee074fdfdc67694d1ed633f9f15383e"},"schema_version":"1.0","source":{"id":"2309.08532","kind":"arxiv","version":3}},"canonical_sha256":"df0883546f448e792a112f7e0132555929cb85eab255ab70cb9d0cc4a08d53eb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"df0883546f448e792a112f7e0132555929cb85eab255ab70cb9d0cc4a08d53eb","first_computed_at":"2026-05-17T23:38:48.874329Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:48.874329Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"M7FpEd1tCrVQkXSO/56vo16d/jE54mw1UTAglVk4JMiS50vvzmYZmv7hSHqKyE/hfXrAbmXUTDMDEBTLaOHuBg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:48.874920Z","signed_message":"canonical_sha256_bytes"},"source_id":"2309.08532","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:640aca6ef029c85aa547d1ed455dc2fec8c3af36d534e9d3d63cc887b113f8ae","sha256:404e706cc109b3b2e545a52c053c0b11365eca13ecf4b251832072e9fd4dbe04"],"state_sha256":"4098a8baaf9beb1b79a4b920f13c52a5098845ef0b859cafe01475879717d560"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KgUPlHYKktTJE1pOQToIAXqnbPPb6IuzYymKTx/ys3bWkJvAbS9wahydHr4Wv/ypEnWhYD9MwpniTqX+FuG4Cw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T15:19:18.043571Z","bundle_sha256":"6e900061d15e64e841a7c22ef988475f19efc8aa77cd7f7ba27b260e16a043c2"}}