{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:Y63FS2OAGCOEFWLD44G4OWFLQ6","short_pith_number":"pith:Y63FS2OA","canonical_record":{"source":{"id":"2511.23473","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-11-28T18:58:14Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"09102d32bfe37073f0ad52ba834a2f3d00a135de04df752a6a7e4896d5f8a30a","abstract_canon_sha256":"9ec9ccdeadbda093ba29308224755256300c3524026bfc72840d7b0924d8f806"},"schema_version":"1.0"},"canonical_sha256":"c7b65969c0309c42d963e70dc758ab87abd9d741857088a373183e4422af9a11","source":{"kind":"arxiv","id":"2511.23473","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.23473","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"arxiv_version","alias_value":"2511.23473v1","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.23473","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"pith_short_12","alias_value":"Y63FS2OAGCOE","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Y63FS2OAGCOEFWLD","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Y63FS2OA","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:Y63FS2OAGCOEFWLD44G4OWFLQ6","target":"record","payload":{"canonical_record":{"source":{"id":"2511.23473","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-11-28T18:58:14Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"09102d32bfe37073f0ad52ba834a2f3d00a135de04df752a6a7e4896d5f8a30a","abstract_canon_sha256":"9ec9ccdeadbda093ba29308224755256300c3524026bfc72840d7b0924d8f806"},"schema_version":"1.0"},"canonical_sha256":"c7b65969c0309c42d963e70dc758ab87abd9d741857088a373183e4422af9a11","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:47.762977Z","signature_b64":"L66J1D098dILH+pxmyg73KmXUvPzutNfexD9zC1glOhMAOByoUGo5gbqJ1TnbzJsEDRvgm3tFyPeQxAYPI2VDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7b65969c0309c42d963e70dc758ab87abd9d741857088a373183e4422af9a11","last_reissued_at":"2026-05-17T23:38:47.762527Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:47.762527Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2511.23473","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+CO8uWrVUHb/5pVreov2KvO0GVZoBMABcXoFxCz4iYsAzEkt2juGib0zjLi40imrrVUnQJsr8weK7GAh6zgaBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T11:40:41.404409Z"},"content_sha256":"ab1c8a084f47f85798a35ada93bef49dadc533fd1f68153b17faa4705813beb2","schema_version":"1.0","event_id":"sha256:ab1c8a084f47f85798a35ada93bef49dadc533fd1f68153b17faa4705813beb2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:Y63FS2OAGCOEFWLD44G4OWFLQ6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ThetaEvolve: Test-time Learning on Open Problems","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A small open-source model learns to evolve programs at test time and sets new best-known bounds on open mathematical problems.","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Baolin Peng, Eva Xu, Hao Cheng, Liliang Ren, Luyao Ma, Pengcheng He, Shao-Rong Su, Shuohang Wang, Simon Shaolei Du, Weizhu Chen, Xinyu Yang, Xuehai He, Yelong Shen, Yiping Wang, Zeyi Huang, Zhiyuan Zeng","submitted_at":"2025-11-28T18:58:14Z","abstract_excerpt":"Recent advances in large language models (LLMs) have enabled breakthroughs in mathematical discovery, exemplified by AlphaEvolve, a closed-source system that evolves programs to improve bounds on open problems. However, it relies on ensembles of frontier LLMs to achieve new bounds and is a pure inference system that models cannot internalize the evolving strategies. We introduce ThetaEvolve, an open-source framework that simplifies and extends AlphaEvolve to efficiently scale both in-context learning and Reinforcement Learning (RL) at test time, allowing models to continually learn from their "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"ThetaEvolve is the first evolving framework that enable a small open-source model, like DeepSeek-R1-0528-Qwen3-8B, to achieve new best-known bounds on open problems (circle packing and first auto-correlation inequality) mentioned in AlphaEvolve.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the observed improvements and cross-task transfer result from the model internalizing evolving strategies via RL rather than from increased total compute, specific hyperparameter choices, or the particular program database construction.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ThetaEvolve enables small open-source LLMs to achieve new best-known bounds on open problems such as circle packing by combining test-time RL with a large program database and lazy penalties.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A small open-source model learns to evolve programs at test time and sets new best-known bounds on open mathematical problems.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"3842f90d0d3319dabd070036d119feb2f1279915fcec9b7db0297f48bae72566"},"source":{"id":"2511.23473","kind":"arxiv","version":1},"verdict":{"id":"739918ff-16ab-4cae-aca2-323c4cf152de","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T13:11:44.260084Z","strongest_claim":"ThetaEvolve is the first evolving framework that enable a small open-source model, like DeepSeek-R1-0528-Qwen3-8B, to achieve new best-known bounds on open problems (circle packing and first auto-correlation inequality) mentioned in AlphaEvolve.","one_line_summary":"ThetaEvolve enables small open-source LLMs to achieve new best-known bounds on open problems such as circle packing by combining test-time RL with a large program database and lazy penalties.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the observed improvements and cross-task transfer result from the model internalizing evolving strategies via RL rather than from increased total compute, specific hyperparameter choices, or the particular program database construction.","pith_extraction_headline":"A small open-source model learns to evolve programs at test time and sets new best-known bounds on open mathematical problems."},"references":{"count":50,"sample":[{"doi":"","year":2024,"title":"Spurious Rewards: Rethinking Training Signals in RLVR","work_id":"8e05ef02-44f0-41ce-aea5-d954f72e9546","ref_index":1,"cited_arxiv_id":"2506.10947","is_internal_anchor":true},{"doi":"","year":null,"title":"The optimal arrangement likely involves variable-sized circles","work_id":"bff7b668-0166-4454-b04d-50daa54823e4","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"A pure hexagonal arrangement may not be optimal due to edge effects","work_id":"f5600522-95d6-48a0-8634-64fbdd6e50e7","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"The densest known circle packings often use a hybrid approach","work_id":"73a883d6-f05f-48ff-8c55-4d1e7a75029d","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"The optimization routine is critically important - simple physics-based models with carefully tuned parameters","work_id":"2c52ab91-8842-4317-b2be-327600b2b6ed","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":50,"snapshot_sha256":"215da317724ce1ea2f2d5c5c04839142318f0da85434e2feb05b9f6becc80ee8","internal_anchors":2},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4e417a6da4d4f03e08a98973d726ddc4fbccee8c408d53a1a7d02269c7c2c5c6"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"739918ff-16ab-4cae-aca2-323c4cf152de"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6Sv/Xznm/dshv9uw6D2VaiLJE6YeQcoqtlEkJIQRgcIZu4/eLu+LJIi5SALEDGj8Uy9QhSASr5TvSehpNPTWBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T11:40:41.405014Z"},"content_sha256":"95222cccf34478947de76562abbdf63334eb9a01e91776e97ace1d8933a2c096","schema_version":"1.0","event_id":"sha256:95222cccf34478947de76562abbdf63334eb9a01e91776e97ace1d8933a2c096"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6/bundle.json","state_url":"https://pith.science/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T11:40:41Z","links":{"resolver":"https://pith.science/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6","bundle":"https://pith.science/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6/bundle.json","state":"https://pith.science/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Y63FS2OAGCOEFWLD44G4OWFLQ6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:Y63FS2OAGCOEFWLD44G4OWFLQ6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9ec9ccdeadbda093ba29308224755256300c3524026bfc72840d7b0924d8f806","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-11-28T18:58:14Z","title_canon_sha256":"09102d32bfe37073f0ad52ba834a2f3d00a135de04df752a6a7e4896d5f8a30a"},"schema_version":"1.0","source":{"id":"2511.23473","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.23473","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"arxiv_version","alias_value":"2511.23473v1","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.23473","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"pith_short_12","alias_value":"Y63FS2OAGCOE","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Y63FS2OAGCOEFWLD","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Y63FS2OA","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:95222cccf34478947de76562abbdf63334eb9a01e91776e97ace1d8933a2c096","target":"graph","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ThetaEvolve is the first evolving framework that enable a small open-source model, like DeepSeek-R1-0528-Qwen3-8B, to achieve new best-known bounds on open problems (circle packing and first auto-correlation inequality) mentioned in AlphaEvolve."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the observed improvements and cross-task transfer result from the model internalizing evolving strategies via RL rather than from increased total compute, specific hyperparameter choices, or the particular program database construction."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ThetaEvolve enables small open-source LLMs to achieve new best-known bounds on open problems such as circle packing by combining test-time RL with a large program database and lazy penalties."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A small open-source model learns to evolve programs at test time and sets new best-known bounds on open mathematical problems."}],"snapshot_sha256":"3842f90d0d3319dabd070036d119feb2f1279915fcec9b7db0297f48bae72566"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4e417a6da4d4f03e08a98973d726ddc4fbccee8c408d53a1a7d02269c7c2c5c6"},"paper":{"abstract_excerpt":"Recent advances in large language models (LLMs) have enabled breakthroughs in mathematical discovery, exemplified by AlphaEvolve, a closed-source system that evolves programs to improve bounds on open problems. However, it relies on ensembles of frontier LLMs to achieve new bounds and is a pure inference system that models cannot internalize the evolving strategies. We introduce ThetaEvolve, an open-source framework that simplifies and extends AlphaEvolve to efficiently scale both in-context learning and Reinforcement Learning (RL) at test time, allowing models to continually learn from their ","authors_text":"Baolin Peng, Eva Xu, Hao Cheng, Liliang Ren, Luyao Ma, Pengcheng He, Shao-Rong Su, Shuohang Wang, Simon Shaolei Du, Weizhu Chen, Xinyu Yang, Xuehai He, Yelong Shen, Yiping Wang, Zeyi Huang, Zhiyuan Zeng","cross_cats":["cs.CL"],"headline":"A small open-source model learns to evolve programs at test time and sets new best-known bounds on open mathematical problems.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-11-28T18:58:14Z","title":"ThetaEvolve: Test-time Learning on Open Problems"},"references":{"count":50,"internal_anchors":2,"resolved_work":50,"sample":[{"cited_arxiv_id":"2506.10947","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Spurious Rewards: Rethinking Training Signals in RLVR","work_id":"8e05ef02-44f0-41ce-aea5-d954f72e9546","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"The optimal arrangement likely involves variable-sized circles","work_id":"bff7b668-0166-4454-b04d-50daa54823e4","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"A pure hexagonal arrangement may not be optimal due to edge effects","work_id":"f5600522-95d6-48a0-8634-64fbdd6e50e7","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"The densest known circle packings often use a hybrid approach","work_id":"73a883d6-f05f-48ff-8c55-4d1e7a75029d","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"The optimization routine is critically important - simple physics-based models with carefully tuned parameters","work_id":"2c52ab91-8842-4317-b2be-327600b2b6ed","year":null}],"snapshot_sha256":"215da317724ce1ea2f2d5c5c04839142318f0da85434e2feb05b9f6becc80ee8"},"source":{"id":"2511.23473","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-16T13:11:44.260084Z","id":"739918ff-16ab-4cae-aca2-323c4cf152de","model_set":{"reader":"grok-4.3"},"one_line_summary":"ThetaEvolve enables small open-source LLMs to achieve new best-known bounds on open problems such as circle packing by combining test-time RL with a large program database and lazy penalties.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A small open-source model learns to evolve programs at test time and sets new best-known bounds on open mathematical problems.","strongest_claim":"ThetaEvolve is the first evolving framework that enable a small open-source model, like DeepSeek-R1-0528-Qwen3-8B, to achieve new best-known bounds on open problems (circle packing and first auto-correlation inequality) mentioned in AlphaEvolve.","weakest_assumption":"That the observed improvements and cross-task transfer result from the model internalizing evolving strategies via RL rather than from increased total compute, specific hyperparameter choices, or the particular program database construction."}},"verdict_id":"739918ff-16ab-4cae-aca2-323c4cf152de"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ab1c8a084f47f85798a35ada93bef49dadc533fd1f68153b17faa4705813beb2","target":"record","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9ec9ccdeadbda093ba29308224755256300c3524026bfc72840d7b0924d8f806","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-11-28T18:58:14Z","title_canon_sha256":"09102d32bfe37073f0ad52ba834a2f3d00a135de04df752a6a7e4896d5f8a30a"},"schema_version":"1.0","source":{"id":"2511.23473","kind":"arxiv","version":1}},"canonical_sha256":"c7b65969c0309c42d963e70dc758ab87abd9d741857088a373183e4422af9a11","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c7b65969c0309c42d963e70dc758ab87abd9d741857088a373183e4422af9a11","first_computed_at":"2026-05-17T23:38:47.762527Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:47.762527Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"L66J1D098dILH+pxmyg73KmXUvPzutNfexD9zC1glOhMAOByoUGo5gbqJ1TnbzJsEDRvgm3tFyPeQxAYPI2VDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:47.762977Z","signed_message":"canonical_sha256_bytes"},"source_id":"2511.23473","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ab1c8a084f47f85798a35ada93bef49dadc533fd1f68153b17faa4705813beb2","sha256:95222cccf34478947de76562abbdf63334eb9a01e91776e97ace1d8933a2c096"],"state_sha256":"93f68a5e6ba1437e477a8d07e0596aed67916c7c36de25d03f40efb92e2ca984"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gjQ5iV28ORSNO9DTtESmnYfEp46nywAUszU+YrFfcJ5GgZfL7UqdcoQEfxvTNrQB5Lwa1KA9Ina0iuiOyV37BQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T11:40:41.407996Z","bundle_sha256":"6b5f88d77b529267c7636d4d046d837e7822179a7165f6d3f496b43a219bfbba"}}