{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:3EU6FVZW3NBIOHXNITZGLCPDPR","short_pith_number":"pith:3EU6FVZW","canonical_record":{"source":{"id":"1604.06602","kind":"arxiv","version":7},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-22T10:53:48Z","cross_cats_sorted":[],"title_canon_sha256":"1c869de26d2048608acdea31d1d6ef8c174e212d6d0eafc067781981848a7b1a","abstract_canon_sha256":"8fccc6de97071832f6d9d4c0313478affcbd6424a98d5124681c3ef26276b006"},"schema_version":"1.0"},"canonical_sha256":"d929e2d736db42871eed44f26589e37c5840811e91df5350e9222eb228f51cdf","source":{"kind":"arxiv","id":"1604.06602","version":7},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1604.06602","created_at":"2026-05-18T00:11:19Z"},{"alias_kind":"arxiv_version","alias_value":"1604.06602v7","created_at":"2026-05-18T00:11:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1604.06602","created_at":"2026-05-18T00:11:19Z"},{"alias_kind":"pith_short_12","alias_value":"3EU6FVZW3NBI","created_at":"2026-05-18T12:29:55Z"},{"alias_kind":"pith_short_16","alias_value":"3EU6FVZW3NBIOHXN","created_at":"2026-05-18T12:29:55Z"},{"alias_kind":"pith_short_8","alias_value":"3EU6FVZW","created_at":"2026-05-18T12:29:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:3EU6FVZW3NBIOHXNITZGLCPDPR","target":"record","payload":{"canonical_record":{"source":{"id":"1604.06602","kind":"arxiv","version":7},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-22T10:53:48Z","cross_cats_sorted":[],"title_canon_sha256":"1c869de26d2048608acdea31d1d6ef8c174e212d6d0eafc067781981848a7b1a","abstract_canon_sha256":"8fccc6de97071832f6d9d4c0313478affcbd6424a98d5124681c3ef26276b006"},"schema_version":"1.0"},"canonical_sha256":"d929e2d736db42871eed44f26589e37c5840811e91df5350e9222eb228f51cdf","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:11:19.361562Z","signature_b64":"pxYUdBDjUB+7BvQcovUGrmlChuB2tvAhmyW0PlvoWE8ZsYEBQ1jEhrvn5vJK3Lpw0Ozlj7dffpu1HJqfkxqpBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d929e2d736db42871eed44f26589e37c5840811e91df5350e9222eb228f51cdf","last_reissued_at":"2026-05-18T00:11:19.360960Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:11:19.360960Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1604.06602","source_version":7,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gbwZG1zuHvuZysV5DllE/c+g7Trns2enio98ro0/46cs7fvFEgTYKswSpoqhMNJA+O2pLiPPx3jhwfNMTjrtAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:57:44.556397Z"},"content_sha256":"35461cdcda6d597393241e48509cd8b8ac2c9ca162437e70349784cb84dcba2a","schema_version":"1.0","event_id":"sha256:35461cdcda6d597393241e48509cd8b8ac2c9ca162437e70349784cb84dcba2a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:3EU6FVZW3NBIOHXNITZGLCPDPR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Clustering with Missing Features: A Penalized Dissimilarity Measure based approach","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Shounak Datta, Supritam Bhattacharjee, Swagatam Das","submitted_at":"2016-04-22T10:53:48Z","abstract_excerpt":"Many real-world clustering problems are plagued by incomplete data characterized by missing or absent features for some or all of the data instances. Traditional clustering methods cannot be directly applied to such data without preprocessing by imputation or marginalization techniques. In this article, we overcome this drawback by utilizing a penalized dissimilarity measure which we refer to as the Feature Weighted Penalty based Dissimilarity (FWPD). Using the FWPD measure, we modify the traditional k-means clustering algorithm and the standard hierarchical agglomerative clustering algorithms"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1604.06602","kind":"arxiv","version":7},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LVk0MmoaLcAgRshYsv3bmkRwCBeB3R9EWkzeiiIwXXMtnFNsxvxoW4PjZTez6EB7OHf1CMncn3fuKMew1l4HAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:57:44.557009Z"},"content_sha256":"5a7bd644f7b9162e921cf838614ca38c2fb944853858b458c8fe7fa4e710effc","schema_version":"1.0","event_id":"sha256:5a7bd644f7b9162e921cf838614ca38c2fb944853858b458c8fe7fa4e710effc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3EU6FVZW3NBIOHXNITZGLCPDPR/bundle.json","state_url":"https://pith.science/pith/3EU6FVZW3NBIOHXNITZGLCPDPR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3EU6FVZW3NBIOHXNITZGLCPDPR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T15:57:44Z","links":{"resolver":"https://pith.science/pith/3EU6FVZW3NBIOHXNITZGLCPDPR","bundle":"https://pith.science/pith/3EU6FVZW3NBIOHXNITZGLCPDPR/bundle.json","state":"https://pith.science/pith/3EU6FVZW3NBIOHXNITZGLCPDPR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3EU6FVZW3NBIOHXNITZGLCPDPR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:3EU6FVZW3NBIOHXNITZGLCPDPR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8fccc6de97071832f6d9d4c0313478affcbd6424a98d5124681c3ef26276b006","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-22T10:53:48Z","title_canon_sha256":"1c869de26d2048608acdea31d1d6ef8c174e212d6d0eafc067781981848a7b1a"},"schema_version":"1.0","source":{"id":"1604.06602","kind":"arxiv","version":7}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1604.06602","created_at":"2026-05-18T00:11:19Z"},{"alias_kind":"arxiv_version","alias_value":"1604.06602v7","created_at":"2026-05-18T00:11:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1604.06602","created_at":"2026-05-18T00:11:19Z"},{"alias_kind":"pith_short_12","alias_value":"3EU6FVZW3NBI","created_at":"2026-05-18T12:29:55Z"},{"alias_kind":"pith_short_16","alias_value":"3EU6FVZW3NBIOHXN","created_at":"2026-05-18T12:29:55Z"},{"alias_kind":"pith_short_8","alias_value":"3EU6FVZW","created_at":"2026-05-18T12:29:55Z"}],"graph_snapshots":[{"event_id":"sha256:5a7bd644f7b9162e921cf838614ca38c2fb944853858b458c8fe7fa4e710effc","target":"graph","created_at":"2026-05-18T00:11:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many real-world clustering problems are plagued by incomplete data characterized by missing or absent features for some or all of the data instances. Traditional clustering methods cannot be directly applied to such data without preprocessing by imputation or marginalization techniques. In this article, we overcome this drawback by utilizing a penalized dissimilarity measure which we refer to as the Feature Weighted Penalty based Dissimilarity (FWPD). Using the FWPD measure, we modify the traditional k-means clustering algorithm and the standard hierarchical agglomerative clustering algorithms","authors_text":"Shounak Datta, Supritam Bhattacharjee, Swagatam Das","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-22T10:53:48Z","title":"Clustering with Missing Features: A Penalized Dissimilarity Measure based approach"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1604.06602","kind":"arxiv","version":7},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:35461cdcda6d597393241e48509cd8b8ac2c9ca162437e70349784cb84dcba2a","target":"record","created_at":"2026-05-18T00:11:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8fccc6de97071832f6d9d4c0313478affcbd6424a98d5124681c3ef26276b006","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-22T10:53:48Z","title_canon_sha256":"1c869de26d2048608acdea31d1d6ef8c174e212d6d0eafc067781981848a7b1a"},"schema_version":"1.0","source":{"id":"1604.06602","kind":"arxiv","version":7}},"canonical_sha256":"d929e2d736db42871eed44f26589e37c5840811e91df5350e9222eb228f51cdf","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d929e2d736db42871eed44f26589e37c5840811e91df5350e9222eb228f51cdf","first_computed_at":"2026-05-18T00:11:19.360960Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:11:19.360960Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pxYUdBDjUB+7BvQcovUGrmlChuB2tvAhmyW0PlvoWE8ZsYEBQ1jEhrvn5vJK3Lpw0Ozlj7dffpu1HJqfkxqpBw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:11:19.361562Z","signed_message":"canonical_sha256_bytes"},"source_id":"1604.06602","source_kind":"arxiv","source_version":7}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:35461cdcda6d597393241e48509cd8b8ac2c9ca162437e70349784cb84dcba2a","sha256:5a7bd644f7b9162e921cf838614ca38c2fb944853858b458c8fe7fa4e710effc"],"state_sha256":"6e721321f4557e8b7f3c9da1df95ab55277ea7a993549555764cb0648d715890"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zzsdzsWvplntqXtQaK3sDmcxil7XEMtNAPgoUUxPkZpNJmRIY1xIK8z3izb9Hj7mYr7KlBra0/ciHfih3J0rBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T15:57:44.560133Z","bundle_sha256":"894f8f1da5b0ce9b5824ef4e31676bfa2b2c57a0fd8dd5e23a47fd2a3b5e3bae"}}