{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:YF5SB6PXUR4YVIH2PMLLL76CEI","short_pith_number":"pith:YF5SB6PX","schema_version":"1.0","canonical_sha256":"c17b20f9f7a4798aa0fa7b16b5ffc222289fb7c1496212e20fa503c7529317a8","source":{"kind":"arxiv","id":"2502.05564","version":2},"attestation_state":"computed","paper":{"title":"TabICL: A Tabular Foundation Model for In-Context Learning on Large Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"David Holzm\\\"uller, Ga\\\"el Varoquaux, Jingang Qu, Marine Le Morvan","submitted_at":"2025-02-08T13:25:04Z","abstract_excerpt":"The long-standing dominance of gradient-boosted decision trees on tabular data is currently challenged by tabular foundation models using In-Context Learning (ICL): setting the training data as context for the test data and predicting in a single forward pass without parameter updates. While TabPFNv2 foundation model excels on tables with up to 10K samples, its alternating column- and row-wise attentions make handling large training sets computationally prohibitive. So, can ICL be effectively scaled and deliver a benefit for larger tables? We introduce TabICL, a tabular foundation model for cl"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2502.05564","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-02-08T13:25:04Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"9f3c86a8b09784adadeb3766543ce88b62282a357696304d9a812d686552ff56","abstract_canon_sha256":"3001da2d8266ffd26f8b92abc8479332bafea035a8e29472c0503766a72752b6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T13:27:29.568226Z","signature_b64":"nn8KilLqEVd85ZgBzmUdR8isdqQ4Jca63SF0HKo+b1VF+iyOwqvSG5p9/aGhVm02iB0Dyc07EP6x3PaCsNQMDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c17b20f9f7a4798aa0fa7b16b5ffc222289fb7c1496212e20fa503c7529317a8","last_reissued_at":"2026-05-20T13:27:29.564638Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T13:27:29.564638Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TabICL: A Tabular Foundation Model for In-Context Learning on Large Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"David Holzm\\\"uller, Ga\\\"el Varoquaux, Jingang Qu, Marine Le Morvan","submitted_at":"2025-02-08T13:25:04Z","abstract_excerpt":"The long-standing dominance of gradient-boosted decision trees on tabular data is currently challenged by tabular foundation models using In-Context Learning (ICL): setting the training data as context for the test data and predicting in a single forward pass without parameter updates. While TabPFNv2 foundation model excels on tables with up to 10K samples, its alternating column- and row-wise attentions make handling large training sets computationally prohibitive. So, can ICL be effectively scaled and deliver a benefit for larger tables? We introduce TabICL, a tabular foundation model for cl"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.05564","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2502.05564/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2502.05564","created_at":"2026-05-20T13:27:29.564760+00:00"},{"alias_kind":"arxiv_version","alias_value":"2502.05564v2","created_at":"2026-05-20T13:27:29.564760+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.05564","created_at":"2026-05-20T13:27:29.564760+00:00"},{"alias_kind":"pith_short_12","alias_value":"YF5SB6PXUR4Y","created_at":"2026-05-20T13:27:29.564760+00:00"},{"alias_kind":"pith_short_16","alias_value":"YF5SB6PXUR4YVIH2","created_at":"2026-05-20T13:27:29.564760+00:00"},{"alias_kind":"pith_short_8","alias_value":"YF5SB6PX","created_at":"2026-05-20T13:27:29.564760+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":22,"internal_anchor_count":22,"sample":[{"citing_arxiv_id":"2603.16513","citing_title":"FEAT: A Linear-Complexity Foundation Model for Extremely Large Structured Data","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18147","citing_title":"Foundation Models for Credit Risk Prediction: A Game Changer?","ref_index":117,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18383","citing_title":"TabH2O: A Unified Foundation Model for Tabular Prediction","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18979","citing_title":"TabQL: In-Context Q-Learning with Tabular Foundation Models","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18635","citing_title":"Data Presentation Over Architecture: Resampling Strategies for Credit Risk Prediction with Tabular Foundation Models","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19662","citing_title":"When Tabular Foundation Models Meet Strategic Tabular Data: A Prior Alignment Approach","ref_index":86,"is_internal_anchor":true},{"citing_arxiv_id":"2506.02978","citing_title":"On the Robustness of Tabular Foundation Models: Test-Time Attacks and In-Context Defenses","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2506.16791","citing_title":"TabArena: A Living Benchmark for Machine Learning on Tabular Data","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2508.10053","citing_title":"xRFM: Accurate, scalable, and interpretable feature learning models for tabular data","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2603.25777","citing_title":"Challenges and opportunities for AI to help deliver fusion energy","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12904","citing_title":"VIP-COP: Context Optimization for Tabular Foundation Models","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06047","citing_title":"TFM-Retouche: A Lightweight Input-Space Adapter for Tabular Foundation Models","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10616","citing_title":"MulTaBench: Benchmarking Multimodal Tabular Learning with Text and Image","ref_index":82,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10230","citing_title":"FORGE: Fragment-Oriented Ranking and Generation for Context-Aware Molecular Optimization","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2604.25154","citing_title":"Prior-Aligned Data Cleaning for Tabular Foundation Models","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06413","citing_title":"Decoupled PFNs: Identifiable Epistemic-Aleatoric Decomposition via Structured Synthetic Priors","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06047","citing_title":"TFM-Retouche: A Lightweight Input-Space Adapter for Tabular Foundation Models","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2604.13332","citing_title":"Selecting Feature Interactions for Generalized Additive Models by Distilling Foundation Models","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2604.05635","citing_title":"From Uniform to Learned Knots: A Study of Spline-Based Numerical Encodings for Tabular Deep Learning","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16123","citing_title":"Tabular foundation models for in-context prediction of molecular properties","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04911","citing_title":"Breaking the Quality-Privacy Tradeoff in Tabular Data Generation via In-Context Learning","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02519","citing_title":"Evaluating Tabular Representation Learning for Network Intrusion Detection","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI","json":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI.json","graph_json":"https://pith.science/api/pith-number/YF5SB6PXUR4YVIH2PMLLL76CEI/graph.json","events_json":"https://pith.science/api/pith-number/YF5SB6PXUR4YVIH2PMLLL76CEI/events.json","paper":"https://pith.science/paper/YF5SB6PX"},"agent_actions":{"view_html":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI","download_json":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI.json","view_paper":"https://pith.science/paper/YF5SB6PX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2502.05564&json=true","fetch_graph":"https://pith.science/api/pith-number/YF5SB6PXUR4YVIH2PMLLL76CEI/graph.json","fetch_events":"https://pith.science/api/pith-number/YF5SB6PXUR4YVIH2PMLLL76CEI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI/action/storage_attestation","attest_author":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI/action/author_attestation","sign_citation":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI/action/citation_signature","submit_replication":"https://pith.science/pith/YF5SB6PXUR4YVIH2PMLLL76CEI/action/replication_record"}},"created_at":"2026-05-20T13:27:29.564760+00:00","updated_at":"2026-05-20T13:27:29.564760+00:00"}