{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:PCHMGKKQH7H3A3I54NGQ3ZFXEX","short_pith_number":"pith:PCHMGKKQ","canonical_record":{"source":{"id":"1902.01334","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-02-04T17:45:55Z","cross_cats_sorted":[],"title_canon_sha256":"daa0e9152ce04e7d0095d2975069aef3b42d7eb683eaa8e7b75914d6b924b588","abstract_canon_sha256":"6855574eab9508e056759383dd0dc8766a5e7c7edb934ce382f2f8bdcd7de6d0"},"schema_version":"1.0"},"canonical_sha256":"788ec329503fcfb06d1de34d0de4b725df530a8fe700fbd69a494c5826ab9870","source":{"kind":"arxiv","id":"1902.01334","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.01334","created_at":"2026-05-17T23:54:49Z"},{"alias_kind":"arxiv_version","alias_value":"1902.01334v1","created_at":"2026-05-17T23:54:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.01334","created_at":"2026-05-17T23:54:49Z"},{"alias_kind":"pith_short_12","alias_value":"PCHMGKKQH7H3","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PCHMGKKQH7H3A3I5","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PCHMGKKQ","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:PCHMGKKQH7H3A3I54NGQ3ZFXEX","target":"record","payload":{"canonical_record":{"source":{"id":"1902.01334","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-02-04T17:45:55Z","cross_cats_sorted":[],"title_canon_sha256":"daa0e9152ce04e7d0095d2975069aef3b42d7eb683eaa8e7b75914d6b924b588","abstract_canon_sha256":"6855574eab9508e056759383dd0dc8766a5e7c7edb934ce382f2f8bdcd7de6d0"},"schema_version":"1.0"},"canonical_sha256":"788ec329503fcfb06d1de34d0de4b725df530a8fe700fbd69a494c5826ab9870","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:49.141778Z","signature_b64":"jX6UM6HZUYgp4ficm3r7IKcbcvnJM///5rw/O3sZTiAvZO8qn3KPoXl5aI4J0TnANhTGGyWzOoKtKKR62QBeDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"788ec329503fcfb06d1de34d0de4b725df530a8fe700fbd69a494c5826ab9870","last_reissued_at":"2026-05-17T23:54:49.141278Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:49.141278Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1902.01334","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:54:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rGJOwxbcY2OwUAzaPfEy0qLtUfB/y0pxmSCDRtCu4dXiLBKwrfI+v0GSFcALxvcbfZ56JIwxPLXPsy1s4jELCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T19:33:38.013041Z"},"content_sha256":"1f092a38d2e568aec36720d71cd9f2074546a992db957c3326c34d8502bcea90","schema_version":"1.0","event_id":"sha256:1f092a38d2e568aec36720d71cd9f2074546a992db957c3326c34d8502bcea90"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:PCHMGKKQH7H3A3I54NGQ3ZFXEX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Distances between Data Sets Based on Summary Statistics","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DS","authors_text":"Nikolaj Tatti","submitted_at":"2019-02-04T17:45:55Z","abstract_excerpt":"The concepts of similarity and distance are crucial in data mining. We consider the problem of defining the distance between two data sets by comparing summary statistics computed from the data sets. The initial definition of our distance is based on geometrical notions of certain sets of distributions. We show that this distance can be computed in cubic time and that it has several intuitive properties. We also show that this distance is the unique Mahalanobis distance satisfying certain assumptions. We also demonstrate that if we are dealing with binary data sets, then the distance can be re"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.01334","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:54:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Q6mVo/EAKqcq8I+rEdApl1Bam4Xnn62Cj9qhP71fEBONQ10/nEeNsUC7pUx2YAx+Q4G2JKyGAT4tFPb3XF9DAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T19:33:38.013453Z"},"content_sha256":"85f9dbfa29687d0dd02844c8394e50f25f6cffb5c2136cf133a185be979e41d7","schema_version":"1.0","event_id":"sha256:85f9dbfa29687d0dd02844c8394e50f25f6cffb5c2136cf133a185be979e41d7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX/bundle.json","state_url":"https://pith.science/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T19:33:38Z","links":{"resolver":"https://pith.science/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX","bundle":"https://pith.science/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX/bundle.json","state":"https://pith.science/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PCHMGKKQH7H3A3I54NGQ3ZFXEX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:PCHMGKKQH7H3A3I54NGQ3ZFXEX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6855574eab9508e056759383dd0dc8766a5e7c7edb934ce382f2f8bdcd7de6d0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-02-04T17:45:55Z","title_canon_sha256":"daa0e9152ce04e7d0095d2975069aef3b42d7eb683eaa8e7b75914d6b924b588"},"schema_version":"1.0","source":{"id":"1902.01334","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.01334","created_at":"2026-05-17T23:54:49Z"},{"alias_kind":"arxiv_version","alias_value":"1902.01334v1","created_at":"2026-05-17T23:54:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.01334","created_at":"2026-05-17T23:54:49Z"},{"alias_kind":"pith_short_12","alias_value":"PCHMGKKQH7H3","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PCHMGKKQH7H3A3I5","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PCHMGKKQ","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:85f9dbfa29687d0dd02844c8394e50f25f6cffb5c2136cf133a185be979e41d7","target":"graph","created_at":"2026-05-17T23:54:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The concepts of similarity and distance are crucial in data mining. We consider the problem of defining the distance between two data sets by comparing summary statistics computed from the data sets. The initial definition of our distance is based on geometrical notions of certain sets of distributions. We show that this distance can be computed in cubic time and that it has several intuitive properties. We also show that this distance is the unique Mahalanobis distance satisfying certain assumptions. We also demonstrate that if we are dealing with binary data sets, then the distance can be re","authors_text":"Nikolaj Tatti","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-02-04T17:45:55Z","title":"Distances between Data Sets Based on Summary Statistics"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.01334","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1f092a38d2e568aec36720d71cd9f2074546a992db957c3326c34d8502bcea90","target":"record","created_at":"2026-05-17T23:54:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6855574eab9508e056759383dd0dc8766a5e7c7edb934ce382f2f8bdcd7de6d0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-02-04T17:45:55Z","title_canon_sha256":"daa0e9152ce04e7d0095d2975069aef3b42d7eb683eaa8e7b75914d6b924b588"},"schema_version":"1.0","source":{"id":"1902.01334","kind":"arxiv","version":1}},"canonical_sha256":"788ec329503fcfb06d1de34d0de4b725df530a8fe700fbd69a494c5826ab9870","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"788ec329503fcfb06d1de34d0de4b725df530a8fe700fbd69a494c5826ab9870","first_computed_at":"2026-05-17T23:54:49.141278Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:54:49.141278Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jX6UM6HZUYgp4ficm3r7IKcbcvnJM///5rw/O3sZTiAvZO8qn3KPoXl5aI4J0TnANhTGGyWzOoKtKKR62QBeDA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:54:49.141778Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.01334","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1f092a38d2e568aec36720d71cd9f2074546a992db957c3326c34d8502bcea90","sha256:85f9dbfa29687d0dd02844c8394e50f25f6cffb5c2136cf133a185be979e41d7"],"state_sha256":"77f3ade152c2a63ab85fc79072f2f99583434c2a75d3d2e589776e4960d39cb9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"otr8fk7q5oxMuX6eYcRY2uOYJQkO7E+6MDAQE54+/DZ17zaTbjwWPYjTJK1jZadCAR35uNWO0f7sDp9TR1WtCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T19:33:38.016198Z","bundle_sha256":"737044b4fd3aacb3129f63f7c6b9ea572479f93cdc507e3db8e87f0c9078d08f"}}