{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:KJKPA7MF7WCQJYX7JXFEOJPGGJ","short_pith_number":"pith:KJKPA7MF","schema_version":"1.0","canonical_sha256":"5254f07d85fd8504e2ff4dca4725e63263a0843d197a1bb100ac935118c0b2f8","source":{"kind":"arxiv","id":"2411.06469","version":2},"attestation_state":"computed","paper":{"title":"ClinicalBench: Can LLMs Beat Traditional ML Models in Clinical Prediction?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Canyu Chen, Che Liu, Danielle Bitterman, Fei Wang, Jian Yu, Kai Shu, Rui Zhang, Shan Chen, Shuang Zhou, Yuan Luo, Zhongwei Wan","submitted_at":"2024-11-10T14:07:43Z","abstract_excerpt":"Large Language Models (LLMs) hold great promise to revolutionize current clinical systems for their superior capacities on medical text processing tasks and medical licensing exams. Meanwhile, traditional ML models such as SVM and XGBoost have still been mainly adopted in clinical prediction tasks. An emerging question is: Can LLMs beat traditional ML models in clinical prediction? Thus, we build a new benchmark ClinicalBench to comprehensively study the clinical predictive modeling capacities of both general-purpose and medical LLMs, and compare them with traditional ML models. ClinicalBench "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2411.06469","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-11-10T14:07:43Z","cross_cats_sorted":[],"title_canon_sha256":"ae0aeebd805d5ef92da8831ede0d29ab0dbf14589eaf2fe477cf380551b5e5ea","abstract_canon_sha256":"aa59183bfde68690cfc16377040dd467b39704f380bd96db815b2cbcffd33dba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:00.985959Z","signature_b64":"6rJON04SvrTTDuSglvmel09SpycUp76pxnrnWa/4HBq65N37nrhxfln4iXMSZ+bQBhPS7sO2Lx/gY6X6RgcSCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5254f07d85fd8504e2ff4dca4725e63263a0843d197a1bb100ac935118c0b2f8","last_reissued_at":"2026-06-09T02:07:00.984867Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:00.984867Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"ClinicalBench: Can LLMs Beat Traditional ML Models in Clinical Prediction?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Canyu Chen, Che Liu, Danielle Bitterman, Fei Wang, Jian Yu, Kai Shu, Rui Zhang, Shan Chen, Shuang Zhou, Yuan Luo, Zhongwei Wan","submitted_at":"2024-11-10T14:07:43Z","abstract_excerpt":"Large Language Models (LLMs) hold great promise to revolutionize current clinical systems for their superior capacities on medical text processing tasks and medical licensing exams. Meanwhile, traditional ML models such as SVM and XGBoost have still been mainly adopted in clinical prediction tasks. An emerging question is: Can LLMs beat traditional ML models in clinical prediction? Thus, we build a new benchmark ClinicalBench to comprehensively study the clinical predictive modeling capacities of both general-purpose and medical LLMs, and compare them with traditional ML models. ClinicalBench "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2411.06469","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2411.06469/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2411.06469","created_at":"2026-06-09T02:07:00.985008+00:00"},{"alias_kind":"arxiv_version","alias_value":"2411.06469v2","created_at":"2026-06-09T02:07:00.985008+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2411.06469","created_at":"2026-06-09T02:07:00.985008+00:00"},{"alias_kind":"pith_short_12","alias_value":"KJKPA7MF7WCQ","created_at":"2026-06-09T02:07:00.985008+00:00"},{"alias_kind":"pith_short_16","alias_value":"KJKPA7MF7WCQJYX7","created_at":"2026-06-09T02:07:00.985008+00:00"},{"alias_kind":"pith_short_8","alias_value":"KJKPA7MF","created_at":"2026-06-09T02:07:00.985008+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.11774","citing_title":"From Token to Token Pair: Efficient Prompt Compression for Large Language Models in Clinical Prediction","ref_index":4,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ","json":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ.json","graph_json":"https://pith.science/api/pith-number/KJKPA7MF7WCQJYX7JXFEOJPGGJ/graph.json","events_json":"https://pith.science/api/pith-number/KJKPA7MF7WCQJYX7JXFEOJPGGJ/events.json","paper":"https://pith.science/paper/KJKPA7MF"},"agent_actions":{"view_html":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ","download_json":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ.json","view_paper":"https://pith.science/paper/KJKPA7MF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2411.06469&json=true","fetch_graph":"https://pith.science/api/pith-number/KJKPA7MF7WCQJYX7JXFEOJPGGJ/graph.json","fetch_events":"https://pith.science/api/pith-number/KJKPA7MF7WCQJYX7JXFEOJPGGJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ/action/storage_attestation","attest_author":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ/action/author_attestation","sign_citation":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ/action/citation_signature","submit_replication":"https://pith.science/pith/KJKPA7MF7WCQJYX7JXFEOJPGGJ/action/replication_record"}},"created_at":"2026-06-09T02:07:00.985008+00:00","updated_at":"2026-06-09T02:07:00.985008+00:00"}