{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2020:234KR6DCQKCXTM72EWFGAAXG5V","short_pith_number":"pith:234KR6DC","schema_version":"1.0","canonical_sha256":"d6f8a8f862828579b3fa258a6002e6ed6ab6c0eac508138209d7c435e53f440a","source":{"kind":"arxiv","id":"2012.06678","version":1},"attestation_state":"computed","paper":{"title":"TabTransformer: Tabular Data Modeling Using Contextual Embeddings","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"TabTransformer applies self-attention to categorical feature embeddings to create contextual representations that raise prediction accuracy on tabular data.","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Ashish Khetan, Milan Cvitkovic, Xin Huang, Zohar Karnin","submitted_at":"2020-12-11T23:31:23Z","abstract_excerpt":"We propose TabTransformer, a novel deep tabular data modeling architecture for supervised and semi-supervised learning. The TabTransformer is built upon self-attention based Transformers. The Transformer layers transform the embeddings of categorical features into robust contextual embeddings to achieve higher prediction accuracy. Through extensive experiments on fifteen publicly available datasets, we show that the TabTransformer outperforms the state-of-the-art deep learning methods for tabular data by at least 1.0% on mean AUC, and matches the performance of tree-based ensemble models. Furt"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2012.06678","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2020-12-11T23:31:23Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"da775df861a4305faae84048f4463e76aecd6af3620932a75a861ab369a98ff6","abstract_canon_sha256":"14a98b2ef24421bfa593b9c24470b298a492a660bbbab01d932a8b622d357324"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.507257Z","signature_b64":"Gng8JZn3OYRs91zT2bO6cHnJuU9+FuaIWhOnpTknOKkMBhk3QSatB1DgVvkg++9iJs9kN1y/DEkvTYmwp0VICQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d6f8a8f862828579b3fa258a6002e6ed6ab6c0eac508138209d7c435e53f440a","last_reissued_at":"2026-05-17T23:38:46.506714Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.506714Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TabTransformer: Tabular Data Modeling Using Contextual Embeddings","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"TabTransformer applies self-attention to categorical feature embeddings to create contextual representations that raise prediction accuracy on tabular data.","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Ashish Khetan, Milan Cvitkovic, Xin Huang, Zohar Karnin","submitted_at":"2020-12-11T23:31:23Z","abstract_excerpt":"We propose TabTransformer, a novel deep tabular data modeling architecture for supervised and semi-supervised learning. The TabTransformer is built upon self-attention based Transformers. The Transformer layers transform the embeddings of categorical features into robust contextual embeddings to achieve higher prediction accuracy. Through extensive experiments on fifteen publicly available datasets, we show that the TabTransformer outperforms the state-of-the-art deep learning methods for tabular data by at least 1.0% on mean AUC, and matches the performance of tree-based ensemble models. Furt"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Through extensive experiments on fifteen publicly available datasets, we show that the TabTransformer outperforms the state-of-the-art deep learning methods for tabular data by at least 1.0% on mean AUC, and matches the performance of tree-based ensemble models.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The fifteen public datasets are representative of real-world tabular distributions and that baseline deep learning and tree methods were tuned to their best possible performance without hidden advantages for the proposed model.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"TabTransformer uses Transformer self-attention to generate contextual embeddings from categorical features in tabular data, outperforming prior deep learning methods by at least 1% mean AUC and matching tree-based ensembles on 15 public datasets while showing robustness to missing and noisy features","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"TabTransformer applies self-attention to categorical feature embeddings to create contextual representations that raise prediction accuracy on tabular data.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"23f5ef713f762cb055b8cadb7a1eddab47051dc174795f4dd522c783a26dbbbf"},"source":{"id":"2012.06678","kind":"arxiv","version":1},"verdict":{"id":"81d9d466-28f4-4742-8bc7-4d39225a178e","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T21:28:50.990411Z","strongest_claim":"Through extensive experiments on fifteen publicly available datasets, we show that the TabTransformer outperforms the state-of-the-art deep learning methods for tabular data by at least 1.0% on mean AUC, and matches the performance of tree-based ensemble models.","one_line_summary":"TabTransformer uses Transformer self-attention to generate contextual embeddings from categorical features in tabular data, outperforming prior deep learning methods by at least 1% mean AUC and matching tree-based ensembles on 15 public datasets while showing robustness to missing and noisy features","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The fifteen public datasets are representative of real-world tabular distributions and that baseline deep learning and tree methods were tuned to their best possible performance without hidden advantages for the proposed model.","pith_extraction_headline":"TabTransformer applies self-attention to categorical feature embeddings to create contextual representations that raise prediction accuracy on tabular data."},"references":{"count":99,"sample":[{"doi":"","year":null,"title":"Proceedings of the ninth international conference on Information and knowledge management , pages=","work_id":"2a654019-2a19-4a40-8ad3-d71bd6cd4b0d","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in neural information processing systems , pages=","work_id":"de2dd579-e4e4-4930-ab73-1cac1aa46d25","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2002,"title":"Learning from labeled and unlabeled data with label propagation , author=. 2002 , publisher=","work_id":"af0992c3-e96a-451e-bc3e-e4a141a7dc29","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in neural information processing systems , pages=","work_id":"2679e2f3-f246-4196-aac3-0b15eed5727c","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Workshop on challenges in representation learning, ICML , volume=","work_id":"eb36ae7f-1e24-491c-aef6-39195c0a348c","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":99,"snapshot_sha256":"287e376195552d6048286aa0be77469c57fb3405ac05d0c0912f26c7e9626284","internal_anchors":8},"formal_canon":{"evidence_count":1,"snapshot_sha256":"bdb99e4f0497c0418d2c49c114d6d0af1f40ecb6958b647b997902aea22e3a4d"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2012.06678","created_at":"2026-05-17T23:38:46.506814+00:00"},{"alias_kind":"arxiv_version","alias_value":"2012.06678v1","created_at":"2026-05-17T23:38:46.506814+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2012.06678","created_at":"2026-05-17T23:38:46.506814+00:00"},{"alias_kind":"pith_short_12","alias_value":"234KR6DCQKCX","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"234KR6DCQKCXTM72","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"234KR6DC","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":35,"internal_anchor_count":35,"sample":[{"citing_arxiv_id":"2403.20208","citing_title":"Unlock the Potential of Large Language Models for Predictive Tabular Tasks in Data Science with Table-Specific Pretraining","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2410.21169","citing_title":"Document Parsing Unveiled: Techniques, Challenges, and Prospects for Structured Information Extraction","ref_index":87,"is_internal_anchor":true},{"citing_arxiv_id":"2505.13518","citing_title":"Data Balancing Strategies: A Systematic Survey of Resampling and Augmentation Methods","ref_index":131,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20234","citing_title":"TabPFN-MT: A Natively Multitask In-Context Learner for Tabular Data","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2604.13392","citing_title":"ReSS: Learning Reasoning Models for Tabular Data Prediction via Symbolic Scaffold","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08146","citing_title":"VT-Bench: A Unified Benchmark for Visual-Tabular Multi-Modal Learning","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16099","citing_title":"Federated Imputation under Heterogeneous Feature Spaces","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2502.05564","citing_title":"TabICL: A Tabular Foundation Model for In-Context Learning on Large Data","ref_index":239,"is_internal_anchor":true},{"citing_arxiv_id":"2502.05564","citing_title":"TabICL: A Tabular Foundation Model for In-Context Learning on Large Data","ref_index":240,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18147","citing_title":"Foundation Models for Credit Risk Prediction: A Game Changer?","ref_index":157,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19014","citing_title":"SAGA: A Sequence-Adaptive Generative Architecture for Multi-Horizon Probabilistic Forecasting with Adaptive Temporal Conformal Prediction","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19662","citing_title":"When Tabular Foundation Models Meet Strategic Tabular Data: A Prior Alignment Approach","ref_index":62,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16085","citing_title":"Towards Foundation Models for Relational Databases with Language Models and Graph Neural Networks","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2506.02978","citing_title":"On the Robustness of Tabular Foundation Models: Test-Time Attacks and In-Context Defenses","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2510.12957","citing_title":"Reveal-to-Revise: Explainable Bias-Aware Generative Modeling with Multimodal Attention","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14406","citing_title":"GeoViSTA: Geospatial Vision-Tabular Transformer for Multimodal Environment Representation","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14464","citing_title":"From Schema to Signal: Retrieval-Augmented Modeling for Relational Data Analytics","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14467","citing_title":"Focused PU learning from imbalanced data","ref_index":62,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11091","citing_title":"ASD-Bench: A Four-Axis Comprehensive Benchmark of AI Models for Autism Spectrum Disorder","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03652","citing_title":"AniMatrix: An Anime Video Generation Model that Thinks in Art, Not Physics","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03430","citing_title":"DynaTab: Dynamic Feature Ordering as Neural Rewiring for High-Dimensional Tabular Data","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27606","citing_title":"ZAYAN: Disentangled Contrastive Transformer for Tabular Remote Sensing Data","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08146","citing_title":"VT-Bench: A Unified Benchmark for Visual-Tabular Multi-Modal Learning","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03652","citing_title":"AniMatrix: An Anime Video Generation Model that Thinks in Art, Not Physics","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04323","citing_title":"LUCAS-MEGA: A Large-Scale Multimodal Dataset for Representation Learning in Soil-Environment Systems","ref_index":11,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":1,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V","json":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V.json","graph_json":"https://pith.science/api/pith-number/234KR6DCQKCXTM72EWFGAAXG5V/graph.json","events_json":"https://pith.science/api/pith-number/234KR6DCQKCXTM72EWFGAAXG5V/events.json","paper":"https://pith.science/paper/234KR6DC"},"agent_actions":{"view_html":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V","download_json":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V.json","view_paper":"https://pith.science/paper/234KR6DC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2012.06678&json=true","fetch_graph":"https://pith.science/api/pith-number/234KR6DCQKCXTM72EWFGAAXG5V/graph.json","fetch_events":"https://pith.science/api/pith-number/234KR6DCQKCXTM72EWFGAAXG5V/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V/action/timestamp_anchor","attest_storage":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V/action/storage_attestation","attest_author":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V/action/author_attestation","sign_citation":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V/action/citation_signature","submit_replication":"https://pith.science/pith/234KR6DCQKCXTM72EWFGAAXG5V/action/replication_record"}},"created_at":"2026-05-17T23:38:46.506814+00:00","updated_at":"2026-05-17T23:38:46.506814+00:00"}