{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:LRQMIUSHT3DEJAY4SSBYZVSWVV","short_pith_number":"pith:LRQMIUSH","schema_version":"1.0","canonical_sha256":"5c60c452479ec644831c94838cd656ad4e1f704644bba58689b522b7b7b0f4f3","source":{"kind":"arxiv","id":"1604.06737","version":1},"attestation_state":"computed","paper":{"title":"Entity Embeddings of Categorical Variables","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Cheng Guo, Felix Berkhahn","submitted_at":"2016-04-22T16:34:30Z","abstract_excerpt":"We map categorical variables in a function approximation problem into Euclidean spaces, which are the entity embeddings of the categorical variables. The mapping is learned by a neural network during the standard supervised training process. Entity embedding not only reduces memory usage and speeds up neural networks compared with one-hot encoding, but more importantly by mapping similar values close to each other in the embedding space it reveals the intrinsic properties of the categorical variables. We applied it successfully in a recent Kaggle competition and were able to reach the third po"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1604.06737","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-22T16:34:30Z","cross_cats_sorted":[],"title_canon_sha256":"a283193b775e748b5612714559aa38b042bd30ffa3b6719c30b0fc2b2c48e78b","abstract_canon_sha256":"665cffb3c16dfdf6c954077ea2a37b19461d62a17857d3371e05b8db8ee8bfc7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:16:28.288325Z","signature_b64":"cHRtLeuD1KaC6hUXvv7HWWXp1knqT2/Ss/MXLDOXz5o1dlx6ykALKRiDaAJjCTkoUjbYJDV961AhTPUGjiX6DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5c60c452479ec644831c94838cd656ad4e1f704644bba58689b522b7b7b0f4f3","last_reissued_at":"2026-05-18T01:16:28.287701Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:16:28.287701Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Entity Embeddings of Categorical Variables","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Cheng Guo, Felix Berkhahn","submitted_at":"2016-04-22T16:34:30Z","abstract_excerpt":"We map categorical variables in a function approximation problem into Euclidean spaces, which are the entity embeddings of the categorical variables. The mapping is learned by a neural network during the standard supervised training process. Entity embedding not only reduces memory usage and speeds up neural networks compared with one-hot encoding, but more importantly by mapping similar values close to each other in the embedding space it reveals the intrinsic properties of the categorical variables. We applied it successfully in a recent Kaggle competition and were able to reach the third po"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1604.06737","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1604.06737","created_at":"2026-05-18T01:16:28.287793+00:00"},{"alias_kind":"arxiv_version","alias_value":"1604.06737v1","created_at":"2026-05-18T01:16:28.287793+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1604.06737","created_at":"2026-05-18T01:16:28.287793+00:00"},{"alias_kind":"pith_short_12","alias_value":"LRQMIUSHT3DE","created_at":"2026-05-18T12:30:29.479603+00:00"},{"alias_kind":"pith_short_16","alias_value":"LRQMIUSHT3DEJAY4","created_at":"2026-05-18T12:30:29.479603+00:00"},{"alias_kind":"pith_short_8","alias_value":"LRQMIUSH","created_at":"2026-05-18T12:30:29.479603+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.01960","citing_title":"Fashion Retail: Forecasting Demand for New Items","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18966","citing_title":"Self-Improving Tabular Language Models via Iterative Reward-Guided Post-Training","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"2604.10337","citing_title":"Integrating SAINT with Tree-Based Models: A Case Study in Employee Attrition Prediction","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08076","citing_title":"$\\phi-$DeepONet: A Discontinuity Capturing Neural Operator","ref_index":38,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV","json":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV.json","graph_json":"https://pith.science/api/pith-number/LRQMIUSHT3DEJAY4SSBYZVSWVV/graph.json","events_json":"https://pith.science/api/pith-number/LRQMIUSHT3DEJAY4SSBYZVSWVV/events.json","paper":"https://pith.science/paper/LRQMIUSH"},"agent_actions":{"view_html":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV","download_json":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV.json","view_paper":"https://pith.science/paper/LRQMIUSH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1604.06737&json=true","fetch_graph":"https://pith.science/api/pith-number/LRQMIUSHT3DEJAY4SSBYZVSWVV/graph.json","fetch_events":"https://pith.science/api/pith-number/LRQMIUSHT3DEJAY4SSBYZVSWVV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV/action/storage_attestation","attest_author":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV/action/author_attestation","sign_citation":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV/action/citation_signature","submit_replication":"https://pith.science/pith/LRQMIUSHT3DEJAY4SSBYZVSWVV/action/replication_record"}},"created_at":"2026-05-18T01:16:28.287793+00:00","updated_at":"2026-05-18T01:16:28.287793+00:00"}