{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:RTLX3XR5ILL64OL7NUH4TXPV72","short_pith_number":"pith:RTLX3XR5","canonical_record":{"source":{"id":"1506.07930","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-06-26T00:48:17Z","cross_cats_sorted":[],"title_canon_sha256":"82e76074f4c513896a99c768a421702ce26d9b1c49714371e2077ddf19d053c7","abstract_canon_sha256":"e058cc48b54a7c492a29674b1241944ab0a646010d6bfd0fc8f15757e50dbcd8"},"schema_version":"1.0"},"canonical_sha256":"8cd77dde3d42d7ee397f6d0fc9ddf5feaa2aa6d0f20597dce769e0125a611c8a","source":{"kind":"arxiv","id":"1506.07930","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1506.07930","created_at":"2026-05-18T00:34:53Z"},{"alias_kind":"arxiv_version","alias_value":"1506.07930v1","created_at":"2026-05-18T00:34:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.07930","created_at":"2026-05-18T00:34:53Z"},{"alias_kind":"pith_short_12","alias_value":"RTLX3XR5ILL6","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_16","alias_value":"RTLX3XR5ILL64OL7","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_8","alias_value":"RTLX3XR5","created_at":"2026-05-18T12:29:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:RTLX3XR5ILL64OL7NUH4TXPV72","target":"record","payload":{"canonical_record":{"source":{"id":"1506.07930","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-06-26T00:48:17Z","cross_cats_sorted":[],"title_canon_sha256":"82e76074f4c513896a99c768a421702ce26d9b1c49714371e2077ddf19d053c7","abstract_canon_sha256":"e058cc48b54a7c492a29674b1241944ab0a646010d6bfd0fc8f15757e50dbcd8"},"schema_version":"1.0"},"canonical_sha256":"8cd77dde3d42d7ee397f6d0fc9ddf5feaa2aa6d0f20597dce769e0125a611c8a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:34:53.058867Z","signature_b64":"9/dTs9UYvZN0ahL3bKBxoQnMFAUPo5+ckrweElbn/9K6Wup/lJAcaKuwIzugPZ45XraDLHSSuOcVtNg3gv70Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8cd77dde3d42d7ee397f6d0fc9ddf5feaa2aa6d0f20597dce769e0125a611c8a","last_reissued_at":"2026-05-18T00:34:53.058214Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:34:53.058214Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1506.07930","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:34:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fD4AVVbSdW/GNuotqy0wWAShCMP1hBLStI+6KTBg5PjHK2o2FdISQhZMe5ecM2zbSewoYfwdRQEcBY5zfpHpCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T20:31:13.184421Z"},"content_sha256":"0d0619c83745cfb6a3799a66c0331a289376711d62e24264b1add34a6c215241","schema_version":"1.0","event_id":"sha256:0d0619c83745cfb6a3799a66c0331a289376711d62e24264b1add34a6c215241"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:RTLX3XR5ILL64OL7NUH4TXPV72","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Clustering categorical data via ensembling dissimilarity matrices","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ML","authors_text":"Bertrand Clarke, Jennifer Clarke, Saeid Amiri","submitted_at":"2015-06-26T00:48:17Z","abstract_excerpt":"We present a technique for clustering categorical data by generating many dissimilarity matrices and averaging over them. We begin by demonstrating our technique on low dimensional categorical data and comparing it to several other techniques that have been proposed. Then we give conditions under which our method should yield good results in general. Our method extends to high dimensional categorical data of equal lengths by ensembling over many choices of explanatory variables. In this context we compare our method with two other methods. Finally, we extend our method to high dimensional cate"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.07930","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:34:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tDyoaoMEkX5Uaz6C7UqTZ2/YmXd7iUqRaxA33yjIxB6f4urzWtpApkffv7/ucDkXEoEzNlgkoZz8w4n6W5eQDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T20:31:13.185118Z"},"content_sha256":"7fdb00bce4718ceb4c0235bc5d8a6f5d21a02e0837d414f7ac94efd858134e3a","schema_version":"1.0","event_id":"sha256:7fdb00bce4718ceb4c0235bc5d8a6f5d21a02e0837d414f7ac94efd858134e3a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RTLX3XR5ILL64OL7NUH4TXPV72/bundle.json","state_url":"https://pith.science/pith/RTLX3XR5ILL64OL7NUH4TXPV72/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RTLX3XR5ILL64OL7NUH4TXPV72/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T20:31:13Z","links":{"resolver":"https://pith.science/pith/RTLX3XR5ILL64OL7NUH4TXPV72","bundle":"https://pith.science/pith/RTLX3XR5ILL64OL7NUH4TXPV72/bundle.json","state":"https://pith.science/pith/RTLX3XR5ILL64OL7NUH4TXPV72/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RTLX3XR5ILL64OL7NUH4TXPV72/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:RTLX3XR5ILL64OL7NUH4TXPV72","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e058cc48b54a7c492a29674b1241944ab0a646010d6bfd0fc8f15757e50dbcd8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-06-26T00:48:17Z","title_canon_sha256":"82e76074f4c513896a99c768a421702ce26d9b1c49714371e2077ddf19d053c7"},"schema_version":"1.0","source":{"id":"1506.07930","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1506.07930","created_at":"2026-05-18T00:34:53Z"},{"alias_kind":"arxiv_version","alias_value":"1506.07930v1","created_at":"2026-05-18T00:34:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.07930","created_at":"2026-05-18T00:34:53Z"},{"alias_kind":"pith_short_12","alias_value":"RTLX3XR5ILL6","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_16","alias_value":"RTLX3XR5ILL64OL7","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_8","alias_value":"RTLX3XR5","created_at":"2026-05-18T12:29:39Z"}],"graph_snapshots":[{"event_id":"sha256:7fdb00bce4718ceb4c0235bc5d8a6f5d21a02e0837d414f7ac94efd858134e3a","target":"graph","created_at":"2026-05-18T00:34:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present a technique for clustering categorical data by generating many dissimilarity matrices and averaging over them. We begin by demonstrating our technique on low dimensional categorical data and comparing it to several other techniques that have been proposed. Then we give conditions under which our method should yield good results in general. Our method extends to high dimensional categorical data of equal lengths by ensembling over many choices of explanatory variables. In this context we compare our method with two other methods. Finally, we extend our method to high dimensional cate","authors_text":"Bertrand Clarke, Jennifer Clarke, Saeid Amiri","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-06-26T00:48:17Z","title":"Clustering categorical data via ensembling dissimilarity matrices"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.07930","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0d0619c83745cfb6a3799a66c0331a289376711d62e24264b1add34a6c215241","target":"record","created_at":"2026-05-18T00:34:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e058cc48b54a7c492a29674b1241944ab0a646010d6bfd0fc8f15757e50dbcd8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-06-26T00:48:17Z","title_canon_sha256":"82e76074f4c513896a99c768a421702ce26d9b1c49714371e2077ddf19d053c7"},"schema_version":"1.0","source":{"id":"1506.07930","kind":"arxiv","version":1}},"canonical_sha256":"8cd77dde3d42d7ee397f6d0fc9ddf5feaa2aa6d0f20597dce769e0125a611c8a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8cd77dde3d42d7ee397f6d0fc9ddf5feaa2aa6d0f20597dce769e0125a611c8a","first_computed_at":"2026-05-18T00:34:53.058214Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:34:53.058214Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9/dTs9UYvZN0ahL3bKBxoQnMFAUPo5+ckrweElbn/9K6Wup/lJAcaKuwIzugPZ45XraDLHSSuOcVtNg3gv70Dw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:34:53.058867Z","signed_message":"canonical_sha256_bytes"},"source_id":"1506.07930","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0d0619c83745cfb6a3799a66c0331a289376711d62e24264b1add34a6c215241","sha256:7fdb00bce4718ceb4c0235bc5d8a6f5d21a02e0837d414f7ac94efd858134e3a"],"state_sha256":"660850151c3094ef4c1b0ce18df066df98c7de38086a3f58897c14be04fa6f47"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RSroVA7o50M5JRMH0KuUZHg7pcQrek45Z8HqQpDIXfV7Uh55g9e0xI/znVqHd94IRsa+7kEnLd72+UBOKUvcDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T20:31:13.188983Z","bundle_sha256":"791cfc8bae5338c8d9da8191de9df654be48e6526325b4b079cb1ac6cd4ace52"}}