{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:KKEEPXKA53F2AB2OJ37JYFH4Q2","short_pith_number":"pith:KKEEPXKA","schema_version":"1.0","canonical_sha256":"528847dd40eecba0074e4efe9c14fc86a3efb1f1c453b83de579a1a79a534e6d","source":{"kind":"arxiv","id":"1905.10309","version":1},"attestation_state":"computed","paper":{"title":"Unsupervised Machine Learning for the Discovery of Latent Disease Clusters and Patient Subgroups Using Electronic Health Records","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"stat.AP","authors_text":"Ahmad P. Tafti, Andrew H. Limper, Elizabeth J. Atkinson, Hongfang Liu, Nan Zhang, Shreyasee Amin, Terry M. Therneau, Yanshan Wang, Yiqing Zhao","submitted_at":"2019-05-17T20:07:22Z","abstract_excerpt":"Machine learning has become ubiquitous and a key technology on mining electronic health records (EHRs) for facilitating clinical research and practice. Unsupervised machine learning, as opposed to supervised learning, has shown promise in identifying novel patterns and relations from EHRs without using human created labels. In this paper, we investigate the application of unsupervised machine learning models in discovering latent disease clusters and patient subgroups based on EHRs. We utilized Latent Dirichlet Allocation (LDA), a generative probabilistic model, and proposed a novel model name"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1905.10309","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.AP","submitted_at":"2019-05-17T20:07:22Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"6c6fbd23923c1b025f7f97bb616f28a4b5716ffffabfb1b59c00e2a543a1f2fd","abstract_canon_sha256":"28150c1d0f1df4a9a3179f5201299d833371e62dddef01d5bd6e977606e42c33"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:10.642789Z","signature_b64":"ujn4aeeD0rKAuYqNirnfawyel1zmxzZ0N/WdxDRzwyQCH+f2E2iGu+tSSUaiN7vr7bcJAY2fz7sIeWVfQHmcAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"528847dd40eecba0074e4efe9c14fc86a3efb1f1c453b83de579a1a79a534e6d","last_reissued_at":"2026-05-17T23:45:10.642061Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:10.642061Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Unsupervised Machine Learning for the Discovery of Latent Disease Clusters and Patient Subgroups Using Electronic Health Records","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"stat.AP","authors_text":"Ahmad P. Tafti, Andrew H. Limper, Elizabeth J. Atkinson, Hongfang Liu, Nan Zhang, Shreyasee Amin, Terry M. Therneau, Yanshan Wang, Yiqing Zhao","submitted_at":"2019-05-17T20:07:22Z","abstract_excerpt":"Machine learning has become ubiquitous and a key technology on mining electronic health records (EHRs) for facilitating clinical research and practice. Unsupervised machine learning, as opposed to supervised learning, has shown promise in identifying novel patterns and relations from EHRs without using human created labels. In this paper, we investigate the application of unsupervised machine learning models in discovering latent disease clusters and patient subgroups based on EHRs. We utilized Latent Dirichlet Allocation (LDA), a generative probabilistic model, and proposed a novel model name"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.10309","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1905.10309","created_at":"2026-05-17T23:45:10.642185+00:00"},{"alias_kind":"arxiv_version","alias_value":"1905.10309v1","created_at":"2026-05-17T23:45:10.642185+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.10309","created_at":"2026-05-17T23:45:10.642185+00:00"},{"alias_kind":"pith_short_12","alias_value":"KKEEPXKA53F2","created_at":"2026-05-18T12:33:21.387695+00:00"},{"alias_kind":"pith_short_16","alias_value":"KKEEPXKA53F2AB2O","created_at":"2026-05-18T12:33:21.387695+00:00"},{"alias_kind":"pith_short_8","alias_value":"KKEEPXKA","created_at":"2026-05-18T12:33:21.387695+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2","json":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2.json","graph_json":"https://pith.science/api/pith-number/KKEEPXKA53F2AB2OJ37JYFH4Q2/graph.json","events_json":"https://pith.science/api/pith-number/KKEEPXKA53F2AB2OJ37JYFH4Q2/events.json","paper":"https://pith.science/paper/KKEEPXKA"},"agent_actions":{"view_html":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2","download_json":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2.json","view_paper":"https://pith.science/paper/KKEEPXKA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1905.10309&json=true","fetch_graph":"https://pith.science/api/pith-number/KKEEPXKA53F2AB2OJ37JYFH4Q2/graph.json","fetch_events":"https://pith.science/api/pith-number/KKEEPXKA53F2AB2OJ37JYFH4Q2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2/action/storage_attestation","attest_author":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2/action/author_attestation","sign_citation":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2/action/citation_signature","submit_replication":"https://pith.science/pith/KKEEPXKA53F2AB2OJ37JYFH4Q2/action/replication_record"}},"created_at":"2026-05-17T23:45:10.642185+00:00","updated_at":"2026-05-17T23:45:10.642185+00:00"}