{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:3JNQYK2S4I7WI4PFLAQSS34ZUR","short_pith_number":"pith:3JNQYK2S","canonical_record":{"source":{"id":"2604.26498","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-29T10:01:16Z","cross_cats_sorted":["q-bio.QM"],"title_canon_sha256":"e9b2b6c7870d2c326f35efcd8a570b734b63e70cbab790c4ffccb8165d2683fb","abstract_canon_sha256":"68f33d041722283f2502d820cb08e271088a761ed6bb332dd7e8585e5f5012a8"},"schema_version":"1.0"},"canonical_sha256":"da5b0c2b52e23f6471e55821296f99a46da4ca73fb416bcd32bdc54cec0ed4c3","source":{"kind":"arxiv","id":"2604.26498","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.26498","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.26498v2","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.26498","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"pith_short_12","alias_value":"3JNQYK2S4I7W","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"pith_short_16","alias_value":"3JNQYK2S4I7WI4PF","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"pith_short_8","alias_value":"3JNQYK2S","created_at":"2026-05-20T00:00:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:3JNQYK2S4I7WI4PFLAQSS34ZUR","target":"record","payload":{"canonical_record":{"source":{"id":"2604.26498","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-29T10:01:16Z","cross_cats_sorted":["q-bio.QM"],"title_canon_sha256":"e9b2b6c7870d2c326f35efcd8a570b734b63e70cbab790c4ffccb8165d2683fb","abstract_canon_sha256":"68f33d041722283f2502d820cb08e271088a761ed6bb332dd7e8585e5f5012a8"},"schema_version":"1.0"},"canonical_sha256":"da5b0c2b52e23f6471e55821296f99a46da4ca73fb416bcd32bdc54cec0ed4c3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:39.586971Z","signature_b64":"n7RBwrjmbxBDciCSOzlWh8xqyyx57527yTJ3K7MZ16XuFWEvUEDRyDYVc9vzC0Xuzw00SsmPVEMqTomxb8wTCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"da5b0c2b52e23f6471e55821296f99a46da4ca73fb416bcd32bdc54cec0ed4c3","last_reissued_at":"2026-05-20T00:00:39.586358Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:39.586358Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.26498","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MopJ415rTZ5mJzva3dsHxJky0fUlPCOZPb9u9U4jabhXnbra4rx7ffpCn7mLC/9QuyzTXBbPQ5C3gWeM9HJfDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T22:34:07.675448Z"},"content_sha256":"dd76ddc2454663ddcf0385b75e4933b6ea2f1b79163822325caee28a250d4695","schema_version":"1.0","event_id":"sha256:dd76ddc2454663ddcf0385b75e4933b6ea2f1b79163822325caee28a250d4695"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:3JNQYK2S4I7WI4PFLAQSS34ZUR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Do Larger Models Really Win in Drug Discovery? A Benchmark Assessment of Model Scaling in AI-Driven Molecular Property and Activity Prediction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Classical ML models outperform larger pretrained and LLM approaches in most molecular prediction tasks for drug discovery","cross_cats":["q-bio.QM"],"primary_cat":"cs.LG","authors_text":"Jinjiang Guo","submitted_at":"2026-04-29T10:01:16Z","abstract_excerpt":"The rapid growth of molecular foundation models and large language models has encouraged a scale centred view of AI in drug discovery, in which larger pretrained models are expected to supersede compact cheminformatics models and graph neural networks (GNNs) trained for individual tasks. We test this assumption across 26 endpoints for molecular properties, toxicity, safety liabilities and biological activity, grouped into ADME, toxicity and bioactivity classes. The benchmark contains 78 endpoint and split entries spanning random, Murcko scaffold and structure separated 5-fold CV. Ordered from "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Across 156 fold mean comparisons, classical ML such as RF(ECFP4) and ExtraTrees(RDKit) win 116, GNNs such as GIN and Ligandformer win 25, pretrained sequence models such as MoLFormer and ChemBERTa2 win 12, and LLM based SAR baselines win three. Compact specialized models remain highly effective for molecular property and activity prediction.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The 78 endpoint and split entries, grouped into ADME, toxicity and bioactivity classes and using random, Murcko scaffold, and structure-separated 5-fold CV, adequately represent the spectrum of real-world drug discovery challenges from closed-library retrospective evaluation to novel chemotype library expansion.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A benchmark across 156 comparisons finds classical ML models win 116 times while larger pretrained and LLM models win far fewer, showing predictive performance depends on model-task fit rather than scale.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Classical ML models outperform larger pretrained and LLM approaches in most molecular prediction tasks for drug discovery","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a5c4ee65720c211475af8ac09dd226dc1d6c80d055baee19d060a0deefe08035"},"source":{"id":"2604.26498","kind":"arxiv","version":2},"verdict":{"id":"bdc3f0c5-9c2b-48af-89c3-7196ab74ee04","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T17:37:02.926593Z","strongest_claim":"Across 156 fold mean comparisons, classical ML such as RF(ECFP4) and ExtraTrees(RDKit) win 116, GNNs such as GIN and Ligandformer win 25, pretrained sequence models such as MoLFormer and ChemBERTa2 win 12, and LLM based SAR baselines win three. Compact specialized models remain highly effective for molecular property and activity prediction.","one_line_summary":"A benchmark across 156 comparisons finds classical ML models win 116 times while larger pretrained and LLM models win far fewer, showing predictive performance depends on model-task fit rather than scale.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The 78 endpoint and split entries, grouped into ADME, toxicity and bioactivity classes and using random, Murcko scaffold, and structure-separated 5-fold CV, adequately represent the spectrum of real-world drug discovery challenges from closed-library retrospective evaluation to novel chemotype library expansion.","pith_extraction_headline":"Classical ML models outperform larger pretrained and LLM approaches in most molecular prediction tasks for drug discovery"},"integrity":{"clean":false,"summary":{"advisory":0,"critical":1,"by_detector":{"doi_compliance":{"total":1,"advisory":0,"critical":1,"informational":0}},"informational":0},"endpoint":"/pith/2604.26498/integrity.json","findings":[{"note":"Identifier '10.3389/fenvs.2015.00085/full' is syntactically valid but the DOI registry (doi.org) returned 404, and Crossref / OpenAlex / internal corpus also have no record. The cited work could not be located through any authoritative source.","detector":"doi_compliance","severity":"critical","ref_index":11,"audited_at":"2026-05-19T20:04:03.577416Z","detected_doi":"10.3389/fenvs.2015.00085/full","finding_type":"unresolvable_identifier","verdict_class":"cross_source","detected_arxiv_id":null}],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-19T20:04:03.577416Z","status":"completed","version":"1.0.0","findings_count":1}],"snapshot_sha256":"384af60205d1fb91b8b41e1b9acc78a9689348640a9f594073d8bf33a901d36c"},"references":{"count":34,"sample":[{"doi":"10.1039/c7sc02664a","year":2018,"title":"Feinberg, Evan and Gomes, Joseph and Geniesse, Caleb and S","work_id":"6351d72d-6135-4484-afc8-abd85cfaa043","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Coley, Cao Xiao, Jimeng Sun, and Marinka Zitnik","work_id":"41bb435a-b447-45e1-afdc-6b08deb86f61","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Bronskill, Krzysztof Maziarz, Henryk Misztela, Julien Lanini, Marwin Segler, Nadine Schneider, and Marc Brockschmidt","work_id":"da7eb9aa-b681-43aa-9758-d13e82b152aa","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Limitations of representation learning in small molecule property prediction.Nature Communications, 14:6394, 2023","work_id":"cfd6caef-cea8-491c-9a1d-372fd8b9e9c1","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Jun Xia, Lecheng Zhang, Xiao Zhu, and Stan Z. Li. Why deep models often cannot beat non-deep counterparts on molecular property prediction?, 2023. URLhttps://arxiv.org/ abs/2306.17702","work_id":"28866309-c9a0-4297-a2e0-2f94079eea8f","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":34,"snapshot_sha256":"290bca46c5922c3f63466a87e82702707be74b36fbb97eab4476ab5c356a29a2","internal_anchors":4},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"bdc3f0c5-9c2b-48af-89c3-7196ab74ee04"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"asDHMLjiHrtWMoRceT3bTmne2UdVxQKodDSoPY/C6BfhqpB1jVlst2+u17yHXI2M9sqJWqoPchJvQW6mn+WnDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T22:34:07.676064Z"},"content_sha256":"d4724885921290692c61c7b79e1851ec0bb25f5a9536c4a7dcff3ccaeaf41286","schema_version":"1.0","event_id":"sha256:d4724885921290692c61c7b79e1851ec0bb25f5a9536c4a7dcff3ccaeaf41286"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:3JNQYK2S4I7WI4PFLAQSS34ZUR","target":"integrity","payload":{"note":"Identifier '10.3389/fenvs.2015.00085/full' is syntactically valid but the DOI registry (doi.org) returned 404, and Crossref / OpenAlex / internal corpus also have no record. The cited work could not be located through any authoritative source.","snippet":"URLhttps://www.frontiersin.org/journals/environmental-science/articles/ 10.3389/fenvs.2015.00085/full","arxiv_id":"2604.26498","detector":"doi_compliance","evidence":{"doi":"10.3389/fenvs.2015.00085/full","arxiv_id":null,"ref_index":11,"raw_excerpt":"URLhttps://www.frontiersin.org/journals/environmental-science/articles/ 10.3389/fenvs.2015.00085/full","verdict_class":"cross_source","checked_sources":["crossref_by_doi","openalex_by_doi","doi_org_head"]},"severity":"critical","ref_index":11,"audited_at":"2026-05-19T20:04:03.577416Z","event_type":"pith.integrity.v1","detected_doi":"10.3389/fenvs.2015.00085/full","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"unresolvable_identifier","evidence_hash":"376f78c24d53c7743473925aded0adffe41c801a9a91da302539449d0db66128","paper_version":2,"verdict_class":"cross_source","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":2824,"payload_sha256":"17a98cea1b96c4ea71ae2a01f6e83dbe12552a3197d3e4b724b996a86c3e7c57","signature_b64":"w4jHUn0t8qLApmMNeVoA/adfxRQgWLLaFkLpcnvLzBgOTnoP2J0pucA3EaZPyPebT1J82EfgcQuSOkRQx/SICA==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-19T20:07:20Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aapsGmJ0R9e1YXkyWCDR8+mR4gheGZCJ8YMLoJCZ6gKLOF0cVF4ckSbTZYbksspetnIu0J9tM0qjne6z2cTvCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T22:34:07.676975Z"},"content_sha256":"bef19ee3810aa64540500b195131f25903ca9ba8a959c1dde05d019b511053c7","schema_version":"1.0","event_id":"sha256:bef19ee3810aa64540500b195131f25903ca9ba8a959c1dde05d019b511053c7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR/bundle.json","state_url":"https://pith.science/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T22:34:07Z","links":{"resolver":"https://pith.science/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR","bundle":"https://pith.science/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR/bundle.json","state":"https://pith.science/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3JNQYK2S4I7WI4PFLAQSS34ZUR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3JNQYK2S4I7WI4PFLAQSS34ZUR","merge_version":"pith-open-graph-merge-v1","event_count":3,"valid_event_count":3,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"68f33d041722283f2502d820cb08e271088a761ed6bb332dd7e8585e5f5012a8","cross_cats_sorted":["q-bio.QM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-29T10:01:16Z","title_canon_sha256":"e9b2b6c7870d2c326f35efcd8a570b734b63e70cbab790c4ffccb8165d2683fb"},"schema_version":"1.0","source":{"id":"2604.26498","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.26498","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.26498v2","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.26498","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"pith_short_12","alias_value":"3JNQYK2S4I7W","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"pith_short_16","alias_value":"3JNQYK2S4I7WI4PF","created_at":"2026-05-20T00:00:39Z"},{"alias_kind":"pith_short_8","alias_value":"3JNQYK2S","created_at":"2026-05-20T00:00:39Z"}],"graph_snapshots":[{"event_id":"sha256:d4724885921290692c61c7b79e1851ec0bb25f5a9536c4a7dcff3ccaeaf41286","target":"graph","created_at":"2026-05-20T00:00:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Across 156 fold mean comparisons, classical ML such as RF(ECFP4) and ExtraTrees(RDKit) win 116, GNNs such as GIN and Ligandformer win 25, pretrained sequence models such as MoLFormer and ChemBERTa2 win 12, and LLM based SAR baselines win three. Compact specialized models remain highly effective for molecular property and activity prediction."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The 78 endpoint and split entries, grouped into ADME, toxicity and bioactivity classes and using random, Murcko scaffold, and structure-separated 5-fold CV, adequately represent the spectrum of real-world drug discovery challenges from closed-library retrospective evaluation to novel chemotype library expansion."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A benchmark across 156 comparisons finds classical ML models win 116 times while larger pretrained and LLM models win far fewer, showing predictive performance depends on model-task fit rather than scale."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Classical ML models outperform larger pretrained and LLM approaches in most molecular prediction tasks for drug discovery"}],"snapshot_sha256":"a5c4ee65720c211475af8ac09dd226dc1d6c80d055baee19d060a0deefe08035"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":false,"detectors_run":[{"findings_count":1,"name":"doi_compliance","ran_at":"2026-05-19T20:04:03.577416Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.26498/integrity.json","findings":[{"audited_at":"2026-05-19T20:04:03.577416Z","detected_arxiv_id":null,"detected_doi":"10.3389/fenvs.2015.00085/full","detector":"doi_compliance","finding_type":"unresolvable_identifier","note":"Identifier '10.3389/fenvs.2015.00085/full' is syntactically valid but the DOI registry (doi.org) returned 404, and Crossref / OpenAlex / internal corpus also have no record. The cited work could not be located through any authoritative source.","ref_index":11,"severity":"critical","verdict_class":"cross_source"}],"snapshot_sha256":"384af60205d1fb91b8b41e1b9acc78a9689348640a9f594073d8bf33a901d36c","summary":{"advisory":0,"by_detector":{"doi_compliance":{"advisory":0,"critical":1,"informational":0,"total":1}},"critical":1,"informational":0}},"paper":{"abstract_excerpt":"The rapid growth of molecular foundation models and large language models has encouraged a scale centred view of AI in drug discovery, in which larger pretrained models are expected to supersede compact cheminformatics models and graph neural networks (GNNs) trained for individual tasks. We test this assumption across 26 endpoints for molecular properties, toxicity, safety liabilities and biological activity, grouped into ADME, toxicity and bioactivity classes. The benchmark contains 78 endpoint and split entries spanning random, Murcko scaffold and structure separated 5-fold CV. Ordered from ","authors_text":"Jinjiang Guo","cross_cats":["q-bio.QM"],"headline":"Classical ML models outperform larger pretrained and LLM approaches in most molecular prediction tasks for drug discovery","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-29T10:01:16Z","title":"Do Larger Models Really Win in Drug Discovery? A Benchmark Assessment of Model Scaling in AI-Driven Molecular Property and Activity Prediction"},"references":{"count":34,"internal_anchors":4,"resolved_work":34,"sample":[{"cited_arxiv_id":"","doi":"10.1039/c7sc02664a","is_internal_anchor":false,"ref_index":1,"title":"Feinberg, Evan and Gomes, Joseph and Geniesse, Caleb and S","work_id":"6351d72d-6135-4484-afc8-abd85cfaa043","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Coley, Cao Xiao, Jimeng Sun, and Marinka Zitnik","work_id":"41bb435a-b447-45e1-afdc-6b08deb86f61","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Bronskill, Krzysztof Maziarz, Henryk Misztela, Julien Lanini, Marwin Segler, Nadine Schneider, and Marc Brockschmidt","work_id":"da7eb9aa-b681-43aa-9758-d13e82b152aa","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Limitations of representation learning in small molecule property prediction.Nature Communications, 14:6394, 2023","work_id":"cfd6caef-cea8-491c-9a1d-372fd8b9e9c1","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Jun Xia, Lecheng Zhang, Xiao Zhu, and Stan Z. Li. Why deep models often cannot beat non-deep counterparts on molecular property prediction?, 2023. URLhttps://arxiv.org/ abs/2306.17702","work_id":"28866309-c9a0-4297-a2e0-2f94079eea8f","year":2023}],"snapshot_sha256":"290bca46c5922c3f63466a87e82702707be74b36fbb97eab4476ab5c356a29a2"},"source":{"id":"2604.26498","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-19T17:37:02.926593Z","id":"bdc3f0c5-9c2b-48af-89c3-7196ab74ee04","model_set":{"reader":"grok-4.3"},"one_line_summary":"A benchmark across 156 comparisons finds classical ML models win 116 times while larger pretrained and LLM models win far fewer, showing predictive performance depends on model-task fit rather than scale.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Classical ML models outperform larger pretrained and LLM approaches in most molecular prediction tasks for drug discovery","strongest_claim":"Across 156 fold mean comparisons, classical ML such as RF(ECFP4) and ExtraTrees(RDKit) win 116, GNNs such as GIN and Ligandformer win 25, pretrained sequence models such as MoLFormer and ChemBERTa2 win 12, and LLM based SAR baselines win three. Compact specialized models remain highly effective for molecular property and activity prediction.","weakest_assumption":"The 78 endpoint and split entries, grouped into ADME, toxicity and bioactivity classes and using random, Murcko scaffold, and structure-separated 5-fold CV, adequately represent the spectrum of real-world drug discovery challenges from closed-library retrospective evaluation to novel chemotype library expansion."}},"verdict_id":"bdc3f0c5-9c2b-48af-89c3-7196ab74ee04"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dd76ddc2454663ddcf0385b75e4933b6ea2f1b79163822325caee28a250d4695","target":"record","created_at":"2026-05-20T00:00:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"68f33d041722283f2502d820cb08e271088a761ed6bb332dd7e8585e5f5012a8","cross_cats_sorted":["q-bio.QM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-29T10:01:16Z","title_canon_sha256":"e9b2b6c7870d2c326f35efcd8a570b734b63e70cbab790c4ffccb8165d2683fb"},"schema_version":"1.0","source":{"id":"2604.26498","kind":"arxiv","version":2}},"canonical_sha256":"da5b0c2b52e23f6471e55821296f99a46da4ca73fb416bcd32bdc54cec0ed4c3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"da5b0c2b52e23f6471e55821296f99a46da4ca73fb416bcd32bdc54cec0ed4c3","first_computed_at":"2026-05-20T00:00:39.586358Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:39.586358Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"n7RBwrjmbxBDciCSOzlWh8xqyyx57527yTJ3K7MZ16XuFWEvUEDRyDYVc9vzC0Xuzw00SsmPVEMqTomxb8wTCA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:39.586971Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.26498","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bef19ee3810aa64540500b195131f25903ca9ba8a959c1dde05d019b511053c7","sha256:dd76ddc2454663ddcf0385b75e4933b6ea2f1b79163822325caee28a250d4695","sha256:d4724885921290692c61c7b79e1851ec0bb25f5a9536c4a7dcff3ccaeaf41286"],"state_sha256":"afc2bf848f381e2d50c254d574a96b554e46ea243883ae2caff236441c1c9fc1"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XKzFX+ycGbCj4lxuaj/TGlpG+g2y0LjfLVVa9uuTnH9ZjNu6o6FW7AoV4+I8iF78dvjJ7FCAyzU5aCFODoBKDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T22:34:07.679515Z","bundle_sha256":"c7362714359694f51898d40385cb939839b1997867d2aa2574d000fb50092491"}}