{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:OHUAB6VQYTEWQDZOPREF4K5DXU","short_pith_number":"pith:OHUAB6VQ","schema_version":"1.0","canonical_sha256":"71e800fab0c4c9680f2e7c485e2ba3bd22fafed837448c48385032d6a4a5bda2","source":{"kind":"arxiv","id":"2506.16791","version":4},"attestation_state":"computed","paper":{"title":"TabArena: A Living Benchmark for Machine Learning on Tabular Data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Andrej Tschalzev, David Holzm\\\"uller, David Salinas, Frank Hutter, Lennart Purucker, Nick Erickson, Prateek Mutalik Desai","submitted_at":"2025-06-20T07:14:48Z","abstract_excerpt":"With the growing popularity of deep learning and foundation models for tabular data, the need for standardized and reliable benchmarks is higher than ever. However, current benchmarks are static. Their design is not updated even if flaws are discovered, model versions are updated, or new models are released. To address this, we introduce TabArena, the first continuously maintained living tabular benchmarking system. To launch TabArena, we manually curate a representative collection of datasets and well-implemented models, conduct a large-scale benchmarking study to initialize a public leaderbo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2506.16791","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-20T07:14:48Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"97db80b81ba73281a8296eaa21103290e7d29c959d0fec46e4c1fa05fbdd60b0","abstract_canon_sha256":"e0a27106a70ed626301e57cce204fd71473b21a56a6c11d3d03c9ddbc3c0c833"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:13.543490Z","signature_b64":"y9TArrsFIxZM3fkR34BBmQ3BiaZ1lp/IMez2c9bK6XqUXVH6PDrfReUmYCs4Vs4IZMeAOTP9BwhLFnBZBlGaCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"71e800fab0c4c9680f2e7c485e2ba3bd22fafed837448c48385032d6a4a5bda2","last_reissued_at":"2026-05-17T23:38:13.542966Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:13.542966Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TabArena: A Living Benchmark for Machine Learning on Tabular Data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Andrej Tschalzev, David Holzm\\\"uller, David Salinas, Frank Hutter, Lennart Purucker, Nick Erickson, Prateek Mutalik Desai","submitted_at":"2025-06-20T07:14:48Z","abstract_excerpt":"With the growing popularity of deep learning and foundation models for tabular data, the need for standardized and reliable benchmarks is higher than ever. However, current benchmarks are static. Their design is not updated even if flaws are discovered, model versions are updated, or new models are released. To address this, we introduce TabArena, the first continuously maintained living tabular benchmarking system. To launch TabArena, we manually curate a representative collection of datasets and well-implemented models, conduct a large-scale benchmarking study to initialize a public leaderbo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.16791","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2506.16791","created_at":"2026-05-17T23:38:13.543039+00:00"},{"alias_kind":"arxiv_version","alias_value":"2506.16791v4","created_at":"2026-05-17T23:38:13.543039+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.16791","created_at":"2026-05-17T23:38:13.543039+00:00"},{"alias_kind":"pith_short_12","alias_value":"OHUAB6VQYTEW","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"OHUAB6VQYTEWQDZO","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"OHUAB6VQ","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":19,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2508.10053","citing_title":"xRFM: Accurate, scalable, and interpretable feature learning models for tabular data","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2512.20761","citing_title":"TS-Arena -- A Live Forecast Pre-Registration Platform","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2602.09329","citing_title":"MacrOData: New Benchmarks of Thousands of Datasets for Tabular Outlier Detection","ref_index":22,"is_internal_anchor":false},{"citing_arxiv_id":"2605.13986","citing_title":"TabPFN-3: Technical Report","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2511.08667","citing_title":"TabPFN-2.5: Advancing the State of the Art in Tabular Foundation Models","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06047","citing_title":"TFM-Retouche: A Lightweight Input-Space Adapter for Tabular Foundation Models","ref_index":8,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06117","citing_title":"BoostLLM: Boosting-inspired LLM Fine-tuning for Few-shot Tabular Classification","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10616","citing_title":"MulTaBench: Benchmarking Multimodal Tabular Learning with Text and Image","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03808","citing_title":"Agentic-imodels: Evolving agentic interpretability tools via autoresearch","ref_index":67,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05993","citing_title":"TabCF: Distributional Control Function Estimation with Tabular Foundation Models","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06117","citing_title":"BoostLLM: Boosting-inspired LLM Fine-tuning for Few-shot Tabular Classification","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06047","citing_title":"TFM-Retouche: A Lightweight Input-Space Adapter for Tabular Foundation Models","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02003","citing_title":"RamanBench: A Large-Scale Benchmark for Machine Learning on Raman Spectroscopy","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.13332","citing_title":"Selecting Feature Interactions for Generalized Additive Models by Distilling Foundation Models","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06814","citing_title":"OmniTabBench: Mapping the Empirical Frontiers of GBDTs, Neural Networks, and Foundation Models for Tabular Data at Scale","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2604.04868","citing_title":"Noise Immunity in In-Context Tabular Learning: An Empirical Robustness Analysis of TabPFN's Attention Mechanisms","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15297","citing_title":"Benchmarking Optimizers for MLPs in Tabular Deep Learning","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2604.16123","citing_title":"Tabular foundation models for in-context prediction of molecular properties","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2604.27351","citing_title":"Heterogeneous Scientific Foundation Model Collaboration","ref_index":47,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU","json":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU.json","graph_json":"https://pith.science/api/pith-number/OHUAB6VQYTEWQDZOPREF4K5DXU/graph.json","events_json":"https://pith.science/api/pith-number/OHUAB6VQYTEWQDZOPREF4K5DXU/events.json","paper":"https://pith.science/paper/OHUAB6VQ"},"agent_actions":{"view_html":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU","download_json":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU.json","view_paper":"https://pith.science/paper/OHUAB6VQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2506.16791&json=true","fetch_graph":"https://pith.science/api/pith-number/OHUAB6VQYTEWQDZOPREF4K5DXU/graph.json","fetch_events":"https://pith.science/api/pith-number/OHUAB6VQYTEWQDZOPREF4K5DXU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU/action/storage_attestation","attest_author":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU/action/author_attestation","sign_citation":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU/action/citation_signature","submit_replication":"https://pith.science/pith/OHUAB6VQYTEWQDZOPREF4K5DXU/action/replication_record"}},"created_at":"2026-05-17T23:38:13.543039+00:00","updated_at":"2026-05-17T23:38:13.543039+00:00"}