{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:FGYWD572I72RQCO3NNT5QWKWRG","short_pith_number":"pith:FGYWD572","schema_version":"1.0","canonical_sha256":"29b161f7fa47f51809db6b67d85956898512346bf0561114f30c3a2a32e7921c","source":{"kind":"arxiv","id":"2603.08206","version":5},"attestation_state":"computed","paper":{"title":"Distributional Regression with Tabular Foundation Models: Evaluating Probabilistic Predictions via Proper Scoring Rules","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jonas Landsgesell, Pascal Knoll, Tizian Wenzel","submitted_at":"2026-03-09T10:38:01Z","abstract_excerpt":"Modern tabular foundation models such as TabPFN and TabICL naturally produce full predictive distributions, while the benchmarks used to evaluate them (TabArena, TALENT, and others) still rely almost exclusively on point-estimate metrics (RMSE, $R^2$). This mismatch implicitly rewards machine learning models or pipelines that elicit a good conditional mean while ignoring the quality of the predictive distribution. We make the case for using proper scoring rules for training, fine-tuning, and benchmarking (ranking) of tabular foundation models. Although all strictly proper scoring rules are the"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.08206","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-09T10:38:01Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"efb602113c13d16c84a35769bf8cececbb1ac96006181e93f68c65824da82816","abstract_canon_sha256":"9d5473b25de017680f6783eae883a7f2dd694c8241114f94b52c348a6fda6d54"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:13:55.432934Z","signature_b64":"TjnV3U2haaIOxW3yaJKM0FsdZ3DFYQMsiuc61TY2xvl3sL+U7unPvoZa5yL4jt4Z9+/y+U0FXXxYtY2ZSnjPBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"29b161f7fa47f51809db6b67d85956898512346bf0561114f30c3a2a32e7921c","last_reissued_at":"2026-06-23T03:13:55.432471Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:13:55.432471Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Distributional Regression with Tabular Foundation Models: Evaluating Probabilistic Predictions via Proper Scoring Rules","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jonas Landsgesell, Pascal Knoll, Tizian Wenzel","submitted_at":"2026-03-09T10:38:01Z","abstract_excerpt":"Modern tabular foundation models such as TabPFN and TabICL naturally produce full predictive distributions, while the benchmarks used to evaluate them (TabArena, TALENT, and others) still rely almost exclusively on point-estimate metrics (RMSE, $R^2$). This mismatch implicitly rewards machine learning models or pipelines that elicit a good conditional mean while ignoring the quality of the predictive distribution. We make the case for using proper scoring rules for training, fine-tuning, and benchmarking (ranking) of tabular foundation models. Although all strictly proper scoring rules are the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.08206","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.08206/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.08206","created_at":"2026-06-23T03:13:55.432521+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.08206v5","created_at":"2026-06-23T03:13:55.432521+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.08206","created_at":"2026-06-23T03:13:55.432521+00:00"},{"alias_kind":"pith_short_12","alias_value":"FGYWD572I72R","created_at":"2026-06-23T03:13:55.432521+00:00"},{"alias_kind":"pith_short_16","alias_value":"FGYWD572I72RQCO3","created_at":"2026-06-23T03:13:55.432521+00:00"},{"alias_kind":"pith_short_8","alias_value":"FGYWD572","created_at":"2026-06-23T03:13:55.432521+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.30410","citing_title":"Beyond IID: How General Are Tabular Foundation Models, Really?","ref_index":23,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG","json":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG.json","graph_json":"https://pith.science/api/pith-number/FGYWD572I72RQCO3NNT5QWKWRG/graph.json","events_json":"https://pith.science/api/pith-number/FGYWD572I72RQCO3NNT5QWKWRG/events.json","paper":"https://pith.science/paper/FGYWD572"},"agent_actions":{"view_html":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG","download_json":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG.json","view_paper":"https://pith.science/paper/FGYWD572","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.08206&json=true","fetch_graph":"https://pith.science/api/pith-number/FGYWD572I72RQCO3NNT5QWKWRG/graph.json","fetch_events":"https://pith.science/api/pith-number/FGYWD572I72RQCO3NNT5QWKWRG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG/action/storage_attestation","attest_author":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG/action/author_attestation","sign_citation":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG/action/citation_signature","submit_replication":"https://pith.science/pith/FGYWD572I72RQCO3NNT5QWKWRG/action/replication_record"}},"created_at":"2026-06-23T03:13:55.432521+00:00","updated_at":"2026-06-23T03:13:55.432521+00:00"}