{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:NGTRCLQMS5XJZA7N7SZSBLPDR3","short_pith_number":"pith:NGTRCLQM","canonical_record":{"source":{"id":"1905.08977","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-05-22T06:39:48Z","cross_cats_sorted":[],"title_canon_sha256":"5cc23796546ffef8ab4caf7331b647b5daa6db76f060b11129de813a34e7ace4","abstract_canon_sha256":"6c4fa27c007a5d3a984e82f0355c2adeb42d5b875e3899a4a0f38a1fbcb577fc"},"schema_version":"1.0"},"canonical_sha256":"69a7112e0c976e9c83edfcb320ade38ecb6a81d6e5767925b99421209fdb9af7","source":{"kind":"arxiv","id":"1905.08977","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.08977","created_at":"2026-05-17T23:45:24Z"},{"alias_kind":"arxiv_version","alias_value":"1905.08977v1","created_at":"2026-05-17T23:45:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.08977","created_at":"2026-05-17T23:45:24Z"},{"alias_kind":"pith_short_12","alias_value":"NGTRCLQMS5XJ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"NGTRCLQMS5XJZA7N","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"NGTRCLQM","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:NGTRCLQMS5XJZA7N7SZSBLPDR3","target":"record","payload":{"canonical_record":{"source":{"id":"1905.08977","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-05-22T06:39:48Z","cross_cats_sorted":[],"title_canon_sha256":"5cc23796546ffef8ab4caf7331b647b5daa6db76f060b11129de813a34e7ace4","abstract_canon_sha256":"6c4fa27c007a5d3a984e82f0355c2adeb42d5b875e3899a4a0f38a1fbcb577fc"},"schema_version":"1.0"},"canonical_sha256":"69a7112e0c976e9c83edfcb320ade38ecb6a81d6e5767925b99421209fdb9af7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:24.703114Z","signature_b64":"UGmuORrnVGZIMwIqMM/kPH3fV8sDXQyl4uWoej8n9xYojUdvStwDzyBRgOpkCn9ZL09l7buXc7Obb2jQ5NEPBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"69a7112e0c976e9c83edfcb320ade38ecb6a81d6e5767925b99421209fdb9af7","last_reissued_at":"2026-05-17T23:45:24.702584Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:24.702584Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1905.08977","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ifj6mYHHh2OJobAegpQ9RgYIGY54/4GR+2d9S0hpOwaBUJpCxzXcMvDxc3aKTpe//zvRedFXh6RVf5tdahsKAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:51:38.192232Z"},"content_sha256":"d56ab67503ead07153a430e60c49a6356d1bf0153adb944c9ab8eeb53a6599b2","schema_version":"1.0","event_id":"sha256:d56ab67503ead07153a430e60c49a6356d1bf0153adb944c9ab8eeb53a6599b2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:NGTRCLQMS5XJZA7N7SZSBLPDR3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Memory-Efficient Sketch Method for Estimating High Similarities in Streaming Sets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DS","authors_text":"Chenxu Wang, John C.S. Lui, Pinghui Wang, Qiaozhu Zhai, Xiaohong Guan, Yiyan Qi, Yuanming Zhang","submitted_at":"2019-05-22T06:39:48Z","abstract_excerpt":"Estimating set similarity and detecting highly similar sets are fundamental problems in areas such as databases, machine learning, and information retrieval. MinHash is a well-known technique for approximating Jaccard similarity of sets and has been successfully used for many applications such as similarity search and large scale learning. Its two compressed versions, b-bit MinHash and Odd Sketch, can significantly reduce the memory usage of the original MinHash method, especially for estimating high similarities (i.e., similarities around 1). Although MinHash can be applied to static sets as "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.08977","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xmS0w1RL1iOtaRNrdaWjdC33xKT871bQWFQs9bXFqzN92veFSgRaOPRxC6/mYMdNMMY6nURrCmuGk2k6t729BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:51:38.192831Z"},"content_sha256":"49e29497f21fd75a21dffaa5171ae0324f6f5bbd4617c10e11521e3058346766","schema_version":"1.0","event_id":"sha256:49e29497f21fd75a21dffaa5171ae0324f6f5bbd4617c10e11521e3058346766"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3/bundle.json","state_url":"https://pith.science/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T17:51:38Z","links":{"resolver":"https://pith.science/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3","bundle":"https://pith.science/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3/bundle.json","state":"https://pith.science/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NGTRCLQMS5XJZA7N7SZSBLPDR3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:NGTRCLQMS5XJZA7N7SZSBLPDR3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6c4fa27c007a5d3a984e82f0355c2adeb42d5b875e3899a4a0f38a1fbcb577fc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-05-22T06:39:48Z","title_canon_sha256":"5cc23796546ffef8ab4caf7331b647b5daa6db76f060b11129de813a34e7ace4"},"schema_version":"1.0","source":{"id":"1905.08977","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.08977","created_at":"2026-05-17T23:45:24Z"},{"alias_kind":"arxiv_version","alias_value":"1905.08977v1","created_at":"2026-05-17T23:45:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.08977","created_at":"2026-05-17T23:45:24Z"},{"alias_kind":"pith_short_12","alias_value":"NGTRCLQMS5XJ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"NGTRCLQMS5XJZA7N","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"NGTRCLQM","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:49e29497f21fd75a21dffaa5171ae0324f6f5bbd4617c10e11521e3058346766","target":"graph","created_at":"2026-05-17T23:45:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Estimating set similarity and detecting highly similar sets are fundamental problems in areas such as databases, machine learning, and information retrieval. MinHash is a well-known technique for approximating Jaccard similarity of sets and has been successfully used for many applications such as similarity search and large scale learning. Its two compressed versions, b-bit MinHash and Odd Sketch, can significantly reduce the memory usage of the original MinHash method, especially for estimating high similarities (i.e., similarities around 1). Although MinHash can be applied to static sets as ","authors_text":"Chenxu Wang, John C.S. Lui, Pinghui Wang, Qiaozhu Zhai, Xiaohong Guan, Yiyan Qi, Yuanming Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-05-22T06:39:48Z","title":"A Memory-Efficient Sketch Method for Estimating High Similarities in Streaming Sets"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.08977","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d56ab67503ead07153a430e60c49a6356d1bf0153adb944c9ab8eeb53a6599b2","target":"record","created_at":"2026-05-17T23:45:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6c4fa27c007a5d3a984e82f0355c2adeb42d5b875e3899a4a0f38a1fbcb577fc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-05-22T06:39:48Z","title_canon_sha256":"5cc23796546ffef8ab4caf7331b647b5daa6db76f060b11129de813a34e7ace4"},"schema_version":"1.0","source":{"id":"1905.08977","kind":"arxiv","version":1}},"canonical_sha256":"69a7112e0c976e9c83edfcb320ade38ecb6a81d6e5767925b99421209fdb9af7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"69a7112e0c976e9c83edfcb320ade38ecb6a81d6e5767925b99421209fdb9af7","first_computed_at":"2026-05-17T23:45:24.702584Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:45:24.702584Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UGmuORrnVGZIMwIqMM/kPH3fV8sDXQyl4uWoej8n9xYojUdvStwDzyBRgOpkCn9ZL09l7buXc7Obb2jQ5NEPBg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:45:24.703114Z","signed_message":"canonical_sha256_bytes"},"source_id":"1905.08977","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d56ab67503ead07153a430e60c49a6356d1bf0153adb944c9ab8eeb53a6599b2","sha256:49e29497f21fd75a21dffaa5171ae0324f6f5bbd4617c10e11521e3058346766"],"state_sha256":"1aa9dc3ce055e8ab2b91334f90a86f503399c8906008812b22e94dc8efac42a4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZPTyE8F4o0WwbiO1Beh1OUb1jyeNW8GZjal6ckUGkvgvq2D3DSqA7WjQRykQHnDg3NBpK4SlG5zCWu2VMNvoBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T17:51:38.195595Z","bundle_sha256":"862a2bab62f227b1f8de8bb24fba0f10a5192d9083aaaebdffd6c72eb6f12157"}}