{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:JMJJNPZBO6PRVNC2QRNEBSWN33","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"72442a4596cb224cd8ca06a06d4d596f0073c6c055ab633d35e6e856a0f40a39","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2023-09-28T19:01:07Z","title_canon_sha256":"4244f6eb42bf5cfaa0a8ca2bc7803a3815d151f208b15816561832a5396f68b0"},"schema_version":"1.0","source":{"id":"2309.16797","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.16797","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"2309.16797v1","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.16797","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"JMJJNPZBO6PR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"JMJJNPZBO6PRVNC2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"JMJJNPZB","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:6f9737fa584200f5b65e7c9b0141ae6af5e055328420a9035b95e5c27e6fb5e9","target":"graph","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Promptbreeder outperforms state-of-the-art prompt strategies such as Chain-of-Thought and Plan-and-Solve Prompting on commonly used arithmetic and commonsense reasoning benchmarks. Furthermore, Promptbreeder is able to evolve intricate task-prompts for the challenging problem of hate speech classification."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the LLM can generate useful mutations and provide reliable fitness evaluations on a training set without systematic biases or errors that would derail the evolutionary process."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Promptbreeder evolves both task prompts and the mutation prompts that improve them using LLMs, outperforming Chain-of-Thought and Plan-and-Solve on arithmetic and commonsense reasoning benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"An LLM can improve prompting by evolving both the task prompts and the mutation rules that generate them."}],"snapshot_sha256":"af8a4608b93492c9a639316a2a8363210f11448aa1c8d3fcc9d2985291fd12a3"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"34973332bdf97d1893a8162d3a15016cc6de881333fbca73ea85f05de7f36b4e"},"paper":{"abstract_excerpt":"Popular prompt strategies like Chain-of-Thought Prompting can dramatically improve the reasoning abilities of Large Language Models (LLMs) in various domains. However, such hand-crafted prompt-strategies are often sub-optimal. In this paper, we present Promptbreeder, a general-purpose self-referential self-improvement mechanism that evolves and adapts prompts for a given domain. Driven by an LLM, Promptbreeder mutates a population of task-prompts, and subsequently evaluates them for fitness on a training set. Crucially, the mutation of these task-prompts is governed by mutation-prompts that th","authors_text":"Chrisantha Fernando, Dylan Banarse, Henryk Michalewski, Simon Osindero, Tim Rockt\\\"aschel","cross_cats":["cs.AI","cs.LG","cs.NE"],"headline":"An LLM can improve prompting by evolving both the task prompts and the mutation rules that generate them.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2023-09-28T19:01:07Z","title":"Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution"},"references":{"count":296,"internal_anchors":67,"resolved_work":296,"sample":[{"cited_arxiv_id":"2112.00114","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Show Your Work: Scratchpads for Intermediate Computation with Language Models","work_id":"a05b1e60-8e76-4f26-9bea-28927a5f8620","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"The Hitchhiker's Guide to the Galaxy , author=. 1995 , publisher=","work_id":"07683f0c-cb34-47d6-83ae-f5d0726ac43a","year":1995},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"NeurIPS , year =","work_id":"387c2ec4-3205-43fa-9107-bd3febe774bc","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"The Eleventh International Conference on Learning Representations,","work_id":"399e38b9-d994-4207-a188-550020e608cf","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"gradient descent","work_id":"8f5910e5-bea1-4761-87ec-05f692dd6f04","year":null}],"snapshot_sha256":"db2d6045761ca65dfd1b0fc1282cac50449235add2e3e1a7df7c663909c2df89"},"source":{"id":"2309.16797","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-16T08:08:25.069041Z","id":"7356eb50-875e-4a35-873b-bf88b81cd52d","model_set":{"reader":"grok-4.3"},"one_line_summary":"Promptbreeder evolves both task prompts and the mutation prompts that improve them using LLMs, outperforming Chain-of-Thought and Plan-and-Solve on arithmetic and commonsense reasoning benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"An LLM can improve prompting by evolving both the task prompts and the mutation rules that generate them.","strongest_claim":"Promptbreeder outperforms state-of-the-art prompt strategies such as Chain-of-Thought and Plan-and-Solve Prompting on commonly used arithmetic and commonsense reasoning benchmarks. Furthermore, Promptbreeder is able to evolve intricate task-prompts for the challenging problem of hate speech classification.","weakest_assumption":"That the LLM can generate useful mutations and provide reliable fitness evaluations on a training set without systematic biases or errors that would derail the evolutionary process."}},"verdict_id":"7356eb50-875e-4a35-873b-bf88b81cd52d"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fab1d801c9d9a88e32b6a3b7321f6bce5a28b349fda10b9e5c3ef38f2c042572","target":"record","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"72442a4596cb224cd8ca06a06d4d596f0073c6c055ab633d35e6e856a0f40a39","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2023-09-28T19:01:07Z","title_canon_sha256":"4244f6eb42bf5cfaa0a8ca2bc7803a3815d151f208b15816561832a5396f68b0"},"schema_version":"1.0","source":{"id":"2309.16797","kind":"arxiv","version":1}},"canonical_sha256":"4b1296bf21779f1ab45a845a40cacddeff4d6a06eb4c42057b7fc3ddd4e5c667","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4b1296bf21779f1ab45a845a40cacddeff4d6a06eb4c42057b7fc3ddd4e5c667","first_computed_at":"2026-05-17T23:38:48.545777Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:48.545777Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8T7YanHoer+tfURCPgaHhzLNVbqg7q4VHvsz9dGqwnv6l3OCe537XBVMQZ9CZvhBEt2ILmrZThH0i5nm54yLAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:48.546255Z","signed_message":"canonical_sha256_bytes"},"source_id":"2309.16797","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fab1d801c9d9a88e32b6a3b7321f6bce5a28b349fda10b9e5c3ef38f2c042572","sha256:6f9737fa584200f5b65e7c9b0141ae6af5e055328420a9035b95e5c27e6fb5e9"],"state_sha256":"37ca41c2a62e69c0eac54095898572da627ac84b6457fc7b1e91e374f32e67c0"}