{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:NCO5TPD4ZVLEOJVC4PVRQ5RDNQ","short_pith_number":"pith:NCO5TPD4","schema_version":"1.0","canonical_sha256":"689dd9bc7ccd564726a2e3eb1876236c21df601f2c51227a52669118aebbf324","source":{"kind":"arxiv","id":"2604.07085","version":2},"attestation_state":"computed","paper":{"title":"Mining Electronic Health Records to Investigate Effectiveness of Ensemble Deep Clustering","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"An ensemble deep clustering method combined with traditional techniques achieves the highest performance in grouping heart failure patients from electronic health records.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Manar D. Samad, Shrabani Ghosh, Yina Hou","submitted_at":"2026-04-08T13:40:05Z","abstract_excerpt":"In electronic health records (EHRs), clustering patients and distinguishing disease subtypes are key tasks to elucidate pathophysiology and aid clinical decision-making. However, clustering in healthcare informatics is still based on traditional methods, especially K-means, and has achieved limited success when applied to embedding representations learned by autoencoders as hybrid methods. This paper investigates the effectiveness of traditional, hybrid, and deep learning methods in heart failure patient cohorts using real EHR data from the All of Us Research Program. Traditional clustering me"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":true},"canonical_record":{"source":{"id":"2604.07085","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-08T13:40:05Z","cross_cats_sorted":[],"title_canon_sha256":"b063613b30dbf1e2f622ceaa4f73b610ca7c31b6f7328d0e82b9c83ca41c7835","abstract_canon_sha256":"df72c1ae45065f82f8fc4357c7f6e4497abe9b22421b72bfc77d992eac34b683"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:08:35.171988Z","signature_b64":"/xoQsmpuWKUO5hrxRJAYiZmTgO4eEtPHJUf5elsVGr6Iw5fyHC7OamGMPD+ETsbWvAwuDjZn/uif6Pz0L4w6Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"689dd9bc7ccd564726a2e3eb1876236c21df601f2c51227a52669118aebbf324","last_reissued_at":"2026-06-10T01:08:35.171114Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:08:35.171114Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mining Electronic Health Records to Investigate Effectiveness of Ensemble Deep Clustering","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"An ensemble deep clustering method combined with traditional techniques achieves the highest performance in grouping heart failure patients from electronic health records.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Manar D. Samad, Shrabani Ghosh, Yina Hou","submitted_at":"2026-04-08T13:40:05Z","abstract_excerpt":"In electronic health records (EHRs), clustering patients and distinguishing disease subtypes are key tasks to elucidate pathophysiology and aid clinical decision-making. However, clustering in healthcare informatics is still based on traditional methods, especially K-means, and has achieved limited success when applied to embedding representations learned by autoencoders as hybrid methods. This paper investigates the effectiveness of traditional, hybrid, and deep learning methods in heart failure patient cohorts using real EHR data from the All of Us Research Program. Traditional clustering me"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"When combined with traditional clustering in a novel ensemble framework, the proposed ensemble embedding for deep clustering delivers the best overall performance ranking across 14 diverse clustering methods and multiple patient cohorts.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That deep learning methods designed for image data inherently underperform on tabular EHR data and that aggregating assignments from multiple embedding dimensions reliably improves clustering quality without overfitting or selection bias.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"An ensemble deep clustering framework combined with traditional methods ranks highest across 14 clustering techniques on real EHR data for heart failure patients from the All of Us program.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"An ensemble deep clustering method combined with traditional techniques achieves the highest performance in grouping heart failure patients from electronic health records.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"94f8c5b5f32303d2070194cad9f6c0ad787bc323db3663c1229f191d7a2af171"},"source":{"id":"2604.07085","kind":"arxiv","version":2},"verdict":{"id":"289fa50f-bde7-41ab-8196-c06f3e0d43d7","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T18:37:50.035665Z","strongest_claim":"When combined with traditional clustering in a novel ensemble framework, the proposed ensemble embedding for deep clustering delivers the best overall performance ranking across 14 diverse clustering methods and multiple patient cohorts.","one_line_summary":"An ensemble deep clustering framework combined with traditional methods ranks highest across 14 clustering techniques on real EHR data for heart failure patients from the All of Us program.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That deep learning methods designed for image data inherently underperform on tabular EHR data and that aggregating assignments from multiple embedding dimensions reliably improves clustering quality without overfitting or selection bias.","pith_extraction_headline":"An ensemble deep clustering method combined with traditional techniques achieves the highest performance in grouping heart failure patients from electronic health records."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.07085/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"ff2949ab3adbeddd207b3f2095d0a46acb8566fc231b2862d96c1ed47f0dac1c"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.07085","created_at":"2026-06-10T01:08:35.171243+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.07085v2","created_at":"2026-06-10T01:08:35.171243+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.07085","created_at":"2026-06-10T01:08:35.171243+00:00"},{"alias_kind":"pith_short_12","alias_value":"NCO5TPD4ZVLE","created_at":"2026-06-10T01:08:35.171243+00:00"},{"alias_kind":"pith_short_16","alias_value":"NCO5TPD4ZVLEOJVC","created_at":"2026-06-10T01:08:35.171243+00:00"},{"alias_kind":"pith_short_8","alias_value":"NCO5TPD4","created_at":"2026-06-10T01:08:35.171243+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ","json":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ.json","graph_json":"https://pith.science/api/pith-number/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/graph.json","events_json":"https://pith.science/api/pith-number/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/events.json","paper":"https://pith.science/paper/NCO5TPD4"},"agent_actions":{"view_html":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ","download_json":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ.json","view_paper":"https://pith.science/paper/NCO5TPD4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.07085&json=true","fetch_graph":"https://pith.science/api/pith-number/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/graph.json","fetch_events":"https://pith.science/api/pith-number/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/action/storage_attestation","attest_author":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/action/author_attestation","sign_citation":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/action/citation_signature","submit_replication":"https://pith.science/pith/NCO5TPD4ZVLEOJVC4PVRQ5RDNQ/action/replication_record"}},"created_at":"2026-06-10T01:08:35.171243+00:00","updated_at":"2026-06-10T01:08:35.171243+00:00"}