{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OICPKZU7OTL26FUZOGC37OFYSQ","short_pith_number":"pith:OICPKZU7","schema_version":"1.0","canonical_sha256":"7204f5669f74d7af16997185bfb8b89412af5e548331a1a419c257e62ce407ce","source":{"kind":"arxiv","id":"2603.12344","version":2},"attestation_state":"computed","paper":{"title":"Can Decision Trees Teach Large Language Models? Distilling Verbalized Knowledge for Molecular Property Prediction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Hoang Thanh Lam, Khiem Le, Marcos Mart\\'inez Galindo, Nitesh V. Chawla, Sreejata Dey, Ting Hua, Vanessa Lopez","submitted_at":"2026-03-12T18:06:21Z","abstract_excerpt":"Molecular Property Prediction (MPP) is a fundamental problem in drug discovery that has recently attracted growing attention. Large Language Models (LLMs), known for their impressive proficiency across domains, show promise as generalist models for MPP. However, their current performance remains below the threshold needed for practical adoption. To bridge this gap, we propose TreeKD for distilling the knowledge of tree-based specialist models into LLMs to complement the internal knowledge of LLMs and improve their predictive accuracy. For each property, we train a specialist decision tree usin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.12344","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-12T18:06:21Z","cross_cats_sorted":[],"title_canon_sha256":"5a8347edcdea6ebfae52b1fd1167d88d5439f02b5c090b6012bb522727e736ae","abstract_canon_sha256":"20f6468e24a805e255f73debca427cdd5d100f67098c06e80e6fe10bae2ff3ca"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:38.305109Z","signature_b64":"0TYAWsHRy92BfsKGO3DVGS29n3MX926T2QZTaNTE1BR091qpFuIOyW+LFhRhsoNqZArR/3XmmQ0uuskxPr31Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7204f5669f74d7af16997185bfb8b89412af5e548331a1a419c257e62ce407ce","last_reissued_at":"2026-05-28T01:04:38.304655Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:38.304655Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Can Decision Trees Teach Large Language Models? Distilling Verbalized Knowledge for Molecular Property Prediction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Hoang Thanh Lam, Khiem Le, Marcos Mart\\'inez Galindo, Nitesh V. Chawla, Sreejata Dey, Ting Hua, Vanessa Lopez","submitted_at":"2026-03-12T18:06:21Z","abstract_excerpt":"Molecular Property Prediction (MPP) is a fundamental problem in drug discovery that has recently attracted growing attention. Large Language Models (LLMs), known for their impressive proficiency across domains, show promise as generalist models for MPP. However, their current performance remains below the threshold needed for practical adoption. To bridge this gap, we propose TreeKD for distilling the knowledge of tree-based specialist models into LLMs to complement the internal knowledge of LLMs and improve their predictive accuracy. For each property, we train a specialist decision tree usin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.12344","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.12344/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.12344","created_at":"2026-05-28T01:04:38.304713+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.12344v2","created_at":"2026-05-28T01:04:38.304713+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.12344","created_at":"2026-05-28T01:04:38.304713+00:00"},{"alias_kind":"pith_short_12","alias_value":"OICPKZU7OTL2","created_at":"2026-05-28T01:04:38.304713+00:00"},{"alias_kind":"pith_short_16","alias_value":"OICPKZU7OTL26FUZ","created_at":"2026-05-28T01:04:38.304713+00:00"},{"alias_kind":"pith_short_8","alias_value":"OICPKZU7","created_at":"2026-05-28T01:04:38.304713+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ","json":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ.json","graph_json":"https://pith.science/api/pith-number/OICPKZU7OTL26FUZOGC37OFYSQ/graph.json","events_json":"https://pith.science/api/pith-number/OICPKZU7OTL26FUZOGC37OFYSQ/events.json","paper":"https://pith.science/paper/OICPKZU7"},"agent_actions":{"view_html":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ","download_json":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ.json","view_paper":"https://pith.science/paper/OICPKZU7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.12344&json=true","fetch_graph":"https://pith.science/api/pith-number/OICPKZU7OTL26FUZOGC37OFYSQ/graph.json","fetch_events":"https://pith.science/api/pith-number/OICPKZU7OTL26FUZOGC37OFYSQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ/action/storage_attestation","attest_author":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ/action/author_attestation","sign_citation":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ/action/citation_signature","submit_replication":"https://pith.science/pith/OICPKZU7OTL26FUZOGC37OFYSQ/action/replication_record"}},"created_at":"2026-05-28T01:04:38.304713+00:00","updated_at":"2026-05-28T01:04:38.304713+00:00"}