{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:67SSZHDSV4XA7PFWYT3MTTLJRP","short_pith_number":"pith:67SSZHDS","schema_version":"1.0","canonical_sha256":"f7e52c9c72af2e0fbcb6c4f6c9cd698becb0547b97945d3063b4d7f2a5326e7e","source":{"kind":"arxiv","id":"1707.01623","version":2},"attestation_state":"computed","paper":{"title":"RIDDLE: Race and ethnicity Imputation from Disease history with Deep LEarning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"q-bio.QM","authors_text":"Andrey Rzhetsky, Ji-Sung Kim, Xin Gao","submitted_at":"2017-07-06T03:03:57Z","abstract_excerpt":"Anonymized electronic medical records are an increasingly popular source of research data. However, these datasets often lack race and ethnicity information. This creates problems for researchers modeling human disease, as race and ethnicity are powerful confounders for many health exposures and treatment outcomes; race and ethnicity are closely linked to population-specific genetic variation. We showed that deep neural networks generate more accurate estimates for missing racial and ethnic information than competing methods (e.g., logistic regression, random forest). RIDDLE yielded significan"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.01623","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"q-bio.QM","submitted_at":"2017-07-06T03:03:57Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"6d28963f3796d1ccdfe6f5f50efae0adf671e299d2cd99c6e9efbb2a8583a5a3","abstract_canon_sha256":"c9b7fbf340b462c3e51fd1f48990a6c31b79b0a2a7b92ade371df24667098ed4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:19.293681Z","signature_b64":"3ZpND247GenZZ2FpE4e1xcJvTcEDqYskN3HqPijoTzR0L5e1zVdciVxsMA1fAT51oeDLt3fN5pE5yfPEuOuSDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f7e52c9c72af2e0fbcb6c4f6c9cd698becb0547b97945d3063b4d7f2a5326e7e","last_reissued_at":"2026-05-18T00:17:19.293174Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:19.293174Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RIDDLE: Race and ethnicity Imputation from Disease history with Deep LEarning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"q-bio.QM","authors_text":"Andrey Rzhetsky, Ji-Sung Kim, Xin Gao","submitted_at":"2017-07-06T03:03:57Z","abstract_excerpt":"Anonymized electronic medical records are an increasingly popular source of research data. However, these datasets often lack race and ethnicity information. This creates problems for researchers modeling human disease, as race and ethnicity are powerful confounders for many health exposures and treatment outcomes; race and ethnicity are closely linked to population-specific genetic variation. We showed that deep neural networks generate more accurate estimates for missing racial and ethnic information than competing methods (e.g., logistic regression, random forest). RIDDLE yielded significan"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.01623","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.01623","created_at":"2026-05-18T00:17:19.293252+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.01623v2","created_at":"2026-05-18T00:17:19.293252+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.01623","created_at":"2026-05-18T00:17:19.293252+00:00"},{"alias_kind":"pith_short_12","alias_value":"67SSZHDSV4XA","created_at":"2026-05-18T12:31:03.183658+00:00"},{"alias_kind":"pith_short_16","alias_value":"67SSZHDSV4XA7PFW","created_at":"2026-05-18T12:31:03.183658+00:00"},{"alias_kind":"pith_short_8","alias_value":"67SSZHDS","created_at":"2026-05-18T12:31:03.183658+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP","json":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP.json","graph_json":"https://pith.science/api/pith-number/67SSZHDSV4XA7PFWYT3MTTLJRP/graph.json","events_json":"https://pith.science/api/pith-number/67SSZHDSV4XA7PFWYT3MTTLJRP/events.json","paper":"https://pith.science/paper/67SSZHDS"},"agent_actions":{"view_html":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP","download_json":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP.json","view_paper":"https://pith.science/paper/67SSZHDS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.01623&json=true","fetch_graph":"https://pith.science/api/pith-number/67SSZHDSV4XA7PFWYT3MTTLJRP/graph.json","fetch_events":"https://pith.science/api/pith-number/67SSZHDSV4XA7PFWYT3MTTLJRP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP/action/storage_attestation","attest_author":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP/action/author_attestation","sign_citation":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP/action/citation_signature","submit_replication":"https://pith.science/pith/67SSZHDSV4XA7PFWYT3MTTLJRP/action/replication_record"}},"created_at":"2026-05-18T00:17:19.293252+00:00","updated_at":"2026-05-18T00:17:19.293252+00:00"}