{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:7TXF2ZL7XPJHK47FDJA3ZE37Q3","short_pith_number":"pith:7TXF2ZL7","schema_version":"1.0","canonical_sha256":"fcee5d657fbbd27573e51a41bc937f86dfad034fb7195f1928f8dc46f938f7a9","source":{"kind":"arxiv","id":"2606.02632","version":1},"attestation_state":"computed","paper":{"title":"Position: Prioritize Identifying Structure, Not Complex Models, for Scientific Discovery","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CY","cs.LG","econ.EM","stat.AP"],"primary_cat":"stat.ML","authors_text":"Tyler H. McCormick","submitted_at":"2026-05-30T15:21:58Z","abstract_excerpt":"Modern Machine Learning (ML) and Artificial Intelligence (AI) models, especially large language models (LLMs), are increasingly used to generate scientific hypotheses and mechanistic explanations from observational data. This position paper argues that in the high-dimensional proxy regimes where modern ML excels, mechanistic learning is generically underdetermined: many incompatible mechanisms induce essentially the same observational relationships on the support of the data, so predictive success and coherent explanations are insufficient evidence of mechanism discovery. This underdeterminati"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.02632","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-30T15:21:58Z","cross_cats_sorted":["cs.AI","cs.CY","cs.LG","econ.EM","stat.AP"],"title_canon_sha256":"20ea5825556a5ad18114ce267b881fd70471fd7ec1c49fc0afe98c17f5abb64b","abstract_canon_sha256":"47d6a73125c56f33c21b1cd19fd25a07155aed6a91bd8c5ba37111154b74d25d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T00:05:05.161727Z","signature_b64":"J1vBa4cyvamLQVfZVXy6RqUSUUZe1bKKNMkymg6i+2Eu3WBWPloRTs9CxseZ0B2fZSWA4zd7J8xlyfe/ODTGCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fcee5d657fbbd27573e51a41bc937f86dfad034fb7195f1928f8dc46f938f7a9","last_reissued_at":"2026-06-03T00:05:05.161330Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T00:05:05.161330Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Position: Prioritize Identifying Structure, Not Complex Models, for Scientific Discovery","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CY","cs.LG","econ.EM","stat.AP"],"primary_cat":"stat.ML","authors_text":"Tyler H. McCormick","submitted_at":"2026-05-30T15:21:58Z","abstract_excerpt":"Modern Machine Learning (ML) and Artificial Intelligence (AI) models, especially large language models (LLMs), are increasingly used to generate scientific hypotheses and mechanistic explanations from observational data. This position paper argues that in the high-dimensional proxy regimes where modern ML excels, mechanistic learning is generically underdetermined: many incompatible mechanisms induce essentially the same observational relationships on the support of the data, so predictive success and coherent explanations are insufficient evidence of mechanism discovery. This underdeterminati"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02632","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.02632/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.02632","created_at":"2026-06-03T00:05:05.161390+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.02632v1","created_at":"2026-06-03T00:05:05.161390+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02632","created_at":"2026-06-03T00:05:05.161390+00:00"},{"alias_kind":"pith_short_12","alias_value":"7TXF2ZL7XPJH","created_at":"2026-06-03T00:05:05.161390+00:00"},{"alias_kind":"pith_short_16","alias_value":"7TXF2ZL7XPJHK47F","created_at":"2026-06-03T00:05:05.161390+00:00"},{"alias_kind":"pith_short_8","alias_value":"7TXF2ZL7","created_at":"2026-06-03T00:05:05.161390+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3","json":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3.json","graph_json":"https://pith.science/api/pith-number/7TXF2ZL7XPJHK47FDJA3ZE37Q3/graph.json","events_json":"https://pith.science/api/pith-number/7TXF2ZL7XPJHK47FDJA3ZE37Q3/events.json","paper":"https://pith.science/paper/7TXF2ZL7"},"agent_actions":{"view_html":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3","download_json":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3.json","view_paper":"https://pith.science/paper/7TXF2ZL7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.02632&json=true","fetch_graph":"https://pith.science/api/pith-number/7TXF2ZL7XPJHK47FDJA3ZE37Q3/graph.json","fetch_events":"https://pith.science/api/pith-number/7TXF2ZL7XPJHK47FDJA3ZE37Q3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3/action/storage_attestation","attest_author":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3/action/author_attestation","sign_citation":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3/action/citation_signature","submit_replication":"https://pith.science/pith/7TXF2ZL7XPJHK47FDJA3ZE37Q3/action/replication_record"}},"created_at":"2026-06-03T00:05:05.161390+00:00","updated_at":"2026-06-03T00:05:05.161390+00:00"}