{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:OEHZB7YYIGFOAXVLRBJ7DTUQQI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3cc3b9ea1441ccd14fd17d5146aa60348e9a7716a07f92b267e2edc1810136d7","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SC","submitted_at":"2026-05-12T20:04:59Z","title_canon_sha256":"f0d8dc883b7455148ae50828d219beb5f0cdff82fd651725148346e99300a723"},"schema_version":"1.0","source":{"id":"2605.12704","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12704","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12704v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12704","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"OEHZB7YYIGFO","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"OEHZB7YYIGFOAXVL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"OEHZB7YY","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:6caf88b28d5c19f9e9b2d635c12de816f2d89a9b3ccf3f5e5fef26481e953df4","target":"graph","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Across five standard benchmarks, FePySR outperforms state-of-the-art methods by achieving higher equation recovery rates. On a set of 75 highly complex synthesized equations, FePySR recovers 36 equations, while producing substantially smaller mean squared errors on the remaining unrecovered cases, with reduced computation time compared to PySR. Applied to ordinary differential equations governing biological systems, FePySR successfully identifies governing equations in 24 out of 100 tests where PySR recovers none."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That observational data can be reliably constrained by the heterogeneous neural network to a set of valid candidate expressions without systematically excluding critical nonlinear modules or introducing many invalid ones that still expand the search space."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"FePySR uses a neural network to pre-extract valid features before PySR search, recovering more equations than baselines on benchmarks and identifying governing ODEs in 24 of 100 biological cases where PySR finds none."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A neural network first extracts candidate features to shrink the search space for symbolic regression, recovering more complex equations than direct search."}],"snapshot_sha256":"02b90951a4d73c60c0ba9f4c55f523a451660926346a72e62af287d1e5efd749"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"9f06bce5694adf4b6da5893efa55d919b7e245482e8114e77a5d4d641d53ee3c"},"paper":{"abstract_excerpt":"A fundamental challenge in symbolic regression (SR) is efficiently recovering complex mathematical expressions from observational data. Although this problem is NP-hard, many expressions of practical interest decompose naturally into combinations of nonlinear feature modules, concentrating structural complexity into a small number of reusable components. Here, we introduce FePySR, a two-stage framework that reduces the SR search space by extracting valid features prior to equation search. FePySR first employs a heterogeneous neural network to constrain observational data to a set of candidate ","authors_text":"Wangtao Lu, Xin Lai, Zhiming Yu","cross_cats":["cs.AI","cs.LG"],"headline":"A neural network first extracts candidate features to shrink the search space for symbolic regression, recovering more complex equations than direct search.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SC","submitted_at":"2026-05-12T20:04:59Z","title":"FePySR: A Neural Feature Extraction Framework for Efficient and Scalable Symbolic Regression"},"references":{"count":43,"internal_anchors":1,"resolved_work":43,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"MarcoVirgolinandSolonP.Pissis. SymbolicregressionisNP-hard.TransactionsonMachineLearning Research, 2022","work_id":"1d53026d-ed3e-48d1-ab7e-8273aac1008a","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Prove symbolic regression is NP-hard by symbol graph.arXiv preprint arXiv:2404.13820, 2024","work_id":"7e77eb2e-9efe-42d9-8a81-57417c2ee82b","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Koza.Genetic programming 2 - automatic discovery of reusable programs","work_id":"a89410dc-12c4-4de5-bc3e-3e2ef2ee9027","year":1994},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Distilling free-form natural laws from experimental data.Science, 324(5923):81–85, 2009","work_id":"8a2b5363-2379-4ab9-a80c-6264e6be74df","year":2009},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"La Cava, Lee Spector, and Kourosh Danai","work_id":"8cbe2be5-9920-4f0e-ae2e-d058f351a2cd","year":2016}],"snapshot_sha256":"9d636f383c45bc0e5d617fa2137e6c3cd4d674dabf794e510102b4ba44e2aff0"},"source":{"id":"2605.12704","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:46:54.432175Z","id":"1683bfd5-fbf6-4a96-bafd-977972194671","model_set":{"reader":"grok-4.3"},"one_line_summary":"FePySR uses a neural network to pre-extract valid features before PySR search, recovering more equations than baselines on benchmarks and identifying governing ODEs in 24 of 100 biological cases where PySR finds none.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A neural network first extracts candidate features to shrink the search space for symbolic regression, recovering more complex equations than direct search.","strongest_claim":"Across five standard benchmarks, FePySR outperforms state-of-the-art methods by achieving higher equation recovery rates. On a set of 75 highly complex synthesized equations, FePySR recovers 36 equations, while producing substantially smaller mean squared errors on the remaining unrecovered cases, with reduced computation time compared to PySR. Applied to ordinary differential equations governing biological systems, FePySR successfully identifies governing equations in 24 out of 100 tests where PySR recovers none.","weakest_assumption":"That observational data can be reliably constrained by the heterogeneous neural network to a set of valid candidate expressions without systematically excluding critical nonlinear modules or introducing many invalid ones that still expand the search space."}},"verdict_id":"1683bfd5-fbf6-4a96-bafd-977972194671"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ea4a13bbfabe82f2247b2d4c4708e7b20d7b8c06f9d1133dde5697a96a165090","target":"record","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3cc3b9ea1441ccd14fd17d5146aa60348e9a7716a07f92b267e2edc1810136d7","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SC","submitted_at":"2026-05-12T20:04:59Z","title_canon_sha256":"f0d8dc883b7455148ae50828d219beb5f0cdff82fd651725148346e99300a723"},"schema_version":"1.0","source":{"id":"2605.12704","kind":"arxiv","version":1}},"canonical_sha256":"710f90ff18418ae05eab8853f1ce908220d967344cd89f2fe0d174887fb028c3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"710f90ff18418ae05eab8853f1ce908220d967344cd89f2fe0d174887fb028c3","first_computed_at":"2026-05-18T03:09:49.642788Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:49.642788Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EYfQs7C7uFN9HCB89GJvlxqoeYvf0pcYRkDZF1su25eLFII7Hl275jJy4Y/e4GMMrs1+oT2lDHTXpHP0x+MeAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:49.643751Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12704","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ea4a13bbfabe82f2247b2d4c4708e7b20d7b8c06f9d1133dde5697a96a165090","sha256:6caf88b28d5c19f9e9b2d635c12de816f2d89a9b3ccf3f5e5fef26481e953df4"],"state_sha256":"606fe9364fa157d31fd6c6b56b088cc31ea43cc04985c7b9ce8acd3ba2225644"}