{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TKCNASRL6TMXQGJIVX4ANK5XKB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"74419dce8761055ac0e98c951857ad64c645e238b715ea12279893958b1ef41b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T15:33:43Z","title_canon_sha256":"8ad782833ecf070daf20763d69489c30ec95f45c0474ae78a805f78b7fc8dfaa"},"schema_version":"1.0","source":{"id":"2606.02384","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.02384","created_at":"2026-06-02T03:04:57Z"},{"alias_kind":"arxiv_version","alias_value":"2606.02384v1","created_at":"2026-06-02T03:04:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02384","created_at":"2026-06-02T03:04:57Z"},{"alias_kind":"pith_short_12","alias_value":"TKCNASRL6TMX","created_at":"2026-06-02T03:04:57Z"},{"alias_kind":"pith_short_16","alias_value":"TKCNASRL6TMXQGJI","created_at":"2026-06-02T03:04:57Z"},{"alias_kind":"pith_short_8","alias_value":"TKCNASRL","created_at":"2026-06-02T03:04:57Z"}],"graph_snapshots":[{"event_id":"sha256:ad748cc9e786385add1370404afb020f2c6581fe0ec0adca80093866228f2f24","target":"graph","created_at":"2026-06-02T03:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.02384/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Progress in tabular machine learning has largely focused on increasingly sophisticated model architectures. At the same time, feature engineering remains a critical yet underexplored component of real-world modeling pipelines that is entirely absent from modern benchmarks, which creates an unquantified evaluation gap. In this work, we introduce TabPrep, a lightweight preprocessing pipeline composed of feature generators that are carefully designed to target three specific structural data patterns. We show that many widely used model classes exhibit predictable blind spots to these patterns and","authors_text":"Andrej Tschalzev, Christian Bartelt, Heiner Stuckenschmidt, Huzefa Rangwala, Nick Erickson, Stefan L\\\"udtke, Yuyang Wang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T15:33:43Z","title":"TabPrep: Closing the Feature Engineering Gap in Tabular Benchmarks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02384","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ebc904b527bbf9158706990f60efcc84d261f7bef9496b585fc50f9cbae8a00","target":"record","created_at":"2026-06-02T03:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"74419dce8761055ac0e98c951857ad64c645e238b715ea12279893958b1ef41b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T15:33:43Z","title_canon_sha256":"8ad782833ecf070daf20763d69489c30ec95f45c0474ae78a805f78b7fc8dfaa"},"schema_version":"1.0","source":{"id":"2606.02384","kind":"arxiv","version":1}},"canonical_sha256":"9a84d04a2bf4d9781928adf806abb75077f6c9d3355830d2c2b3af205e317549","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9a84d04a2bf4d9781928adf806abb75077f6c9d3355830d2c2b3af205e317549","first_computed_at":"2026-06-02T03:04:57.754037Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T03:04:57.754037Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QltCunH0UbL6n7PGS63y2C74P7Mv0nFQSt9yxNWucbAU0e7t9lwbYJj8r/rEgttKqlBTiE3pCFOYd+5ub/rjAA==","signature_status":"signed_v1","signed_at":"2026-06-02T03:04:57.754437Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.02384","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5ebc904b527bbf9158706990f60efcc84d261f7bef9496b585fc50f9cbae8a00","sha256:ad748cc9e786385add1370404afb020f2c6581fe0ec0adca80093866228f2f24"],"state_sha256":"31bb5adb4c86525eff973422cd73e85a726f70354d8b2ce13388f35e24da6993"}