{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2011:UCYSBWJFVACKAMKV7DGMQT5DN5","short_pith_number":"pith:UCYSBWJF","canonical_record":{"source":{"id":"1111.7171","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2011-11-30T14:12:22Z","cross_cats_sorted":[],"title_canon_sha256":"30d228dd238ca6f31cb06d5138bf48063bf2553652f4f59b3c75af0880bdee06","abstract_canon_sha256":"09c6976c3859ec068a4c573d522827f12f513c8928070ff7bbdd8a5a3752e0f6"},"schema_version":"1.0"},"canonical_sha256":"a0b120d925a804a03155f8ccc84fa36f484a47c00a5292cb71b4cf5bbdfa9632","source":{"kind":"arxiv","id":"1111.7171","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1111.7171","created_at":"2026-05-18T04:07:18Z"},{"alias_kind":"arxiv_version","alias_value":"1111.7171v1","created_at":"2026-05-18T04:07:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1111.7171","created_at":"2026-05-18T04:07:18Z"},{"alias_kind":"pith_short_12","alias_value":"UCYSBWJFVACK","created_at":"2026-05-18T12:26:42Z"},{"alias_kind":"pith_short_16","alias_value":"UCYSBWJFVACKAMKV","created_at":"2026-05-18T12:26:42Z"},{"alias_kind":"pith_short_8","alias_value":"UCYSBWJF","created_at":"2026-05-18T12:26:42Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2011:UCYSBWJFVACKAMKV7DGMQT5DN5","target":"record","payload":{"canonical_record":{"source":{"id":"1111.7171","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2011-11-30T14:12:22Z","cross_cats_sorted":[],"title_canon_sha256":"30d228dd238ca6f31cb06d5138bf48063bf2553652f4f59b3c75af0880bdee06","abstract_canon_sha256":"09c6976c3859ec068a4c573d522827f12f513c8928070ff7bbdd8a5a3752e0f6"},"schema_version":"1.0"},"canonical_sha256":"a0b120d925a804a03155f8ccc84fa36f484a47c00a5292cb71b4cf5bbdfa9632","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T04:07:18.578358Z","signature_b64":"0a3Nff14J2czeWRwJqtVgu7uDvdwpSq74XdZT1Tmx93aWsVJEpTPSzSnZAKKLrZe+cWuzLCrXZGW1UsGWzpVCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a0b120d925a804a03155f8ccc84fa36f484a47c00a5292cb71b4cf5bbdfa9632","last_reissued_at":"2026-05-18T04:07:18.577691Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T04:07:18.577691Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1111.7171","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:07:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VhsA0JvSaWHiu0ldt1v7eoGy/zOxiIqFcNC8zWX5jnhV+zuETKNCzgwKe0g8w/SKJCFIkZ5E5LKFZFn5qunvCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T12:20:04.129571Z"},"content_sha256":"10b3ab863f04995f691caf3494d65cf714c9f443099a5d488ff6c1cc2c11bb02","schema_version":"1.0","event_id":"sha256:10b3ab863f04995f691caf3494d65cf714c9f443099a5d488ff6c1cc2c11bb02"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2011:UCYSBWJFVACKAMKV7DGMQT5DN5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"PASS-JOIN: A Partition-based Method for Similarity Joins","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Dong Deng, Guoliang Li, Jianhua Feng, Jiannan Wang","submitted_at":"2011-11-30T14:12:22Z","abstract_excerpt":"As an essential operation in data cleaning, the similarity join has attracted considerable attention from the database community. In this paper, we study string similarity joins with edit-distance constraints, which find similar string pairs from two large sets of strings whose edit distance is within a given threshold. Existing algorithms are efficient either for short strings or for long strings, and there is no algorithm that can efficiently and adaptively support both short strings and long strings. To address this problem, we propose a partition-based method called Pass-Join. Pass-Join pa"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1111.7171","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:07:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OKI+aDfQOxw9u21yscmG4rLRRCYRGgb7HOfUIZCm00SPw6EZ/heInPiCRvMOIYsalvJBqTWEEH0xWIcBiKPrAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T12:20:04.129913Z"},"content_sha256":"edfeb09871b911a11c7dbc30e62dc82e983e11e8e425e2acbf6ab5e8e79800a5","schema_version":"1.0","event_id":"sha256:edfeb09871b911a11c7dbc30e62dc82e983e11e8e425e2acbf6ab5e8e79800a5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UCYSBWJFVACKAMKV7DGMQT5DN5/bundle.json","state_url":"https://pith.science/pith/UCYSBWJFVACKAMKV7DGMQT5DN5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UCYSBWJFVACKAMKV7DGMQT5DN5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T12:20:04Z","links":{"resolver":"https://pith.science/pith/UCYSBWJFVACKAMKV7DGMQT5DN5","bundle":"https://pith.science/pith/UCYSBWJFVACKAMKV7DGMQT5DN5/bundle.json","state":"https://pith.science/pith/UCYSBWJFVACKAMKV7DGMQT5DN5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UCYSBWJFVACKAMKV7DGMQT5DN5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2011:UCYSBWJFVACKAMKV7DGMQT5DN5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"09c6976c3859ec068a4c573d522827f12f513c8928070ff7bbdd8a5a3752e0f6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2011-11-30T14:12:22Z","title_canon_sha256":"30d228dd238ca6f31cb06d5138bf48063bf2553652f4f59b3c75af0880bdee06"},"schema_version":"1.0","source":{"id":"1111.7171","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1111.7171","created_at":"2026-05-18T04:07:18Z"},{"alias_kind":"arxiv_version","alias_value":"1111.7171v1","created_at":"2026-05-18T04:07:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1111.7171","created_at":"2026-05-18T04:07:18Z"},{"alias_kind":"pith_short_12","alias_value":"UCYSBWJFVACK","created_at":"2026-05-18T12:26:42Z"},{"alias_kind":"pith_short_16","alias_value":"UCYSBWJFVACKAMKV","created_at":"2026-05-18T12:26:42Z"},{"alias_kind":"pith_short_8","alias_value":"UCYSBWJF","created_at":"2026-05-18T12:26:42Z"}],"graph_snapshots":[{"event_id":"sha256:edfeb09871b911a11c7dbc30e62dc82e983e11e8e425e2acbf6ab5e8e79800a5","target":"graph","created_at":"2026-05-18T04:07:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"As an essential operation in data cleaning, the similarity join has attracted considerable attention from the database community. In this paper, we study string similarity joins with edit-distance constraints, which find similar string pairs from two large sets of strings whose edit distance is within a given threshold. Existing algorithms are efficient either for short strings or for long strings, and there is no algorithm that can efficiently and adaptively support both short strings and long strings. To address this problem, we propose a partition-based method called Pass-Join. Pass-Join pa","authors_text":"Dong Deng, Guoliang Li, Jianhua Feng, Jiannan Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2011-11-30T14:12:22Z","title":"PASS-JOIN: A Partition-based Method for Similarity Joins"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1111.7171","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:10b3ab863f04995f691caf3494d65cf714c9f443099a5d488ff6c1cc2c11bb02","target":"record","created_at":"2026-05-18T04:07:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"09c6976c3859ec068a4c573d522827f12f513c8928070ff7bbdd8a5a3752e0f6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2011-11-30T14:12:22Z","title_canon_sha256":"30d228dd238ca6f31cb06d5138bf48063bf2553652f4f59b3c75af0880bdee06"},"schema_version":"1.0","source":{"id":"1111.7171","kind":"arxiv","version":1}},"canonical_sha256":"a0b120d925a804a03155f8ccc84fa36f484a47c00a5292cb71b4cf5bbdfa9632","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a0b120d925a804a03155f8ccc84fa36f484a47c00a5292cb71b4cf5bbdfa9632","first_computed_at":"2026-05-18T04:07:18.577691Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T04:07:18.577691Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0a3Nff14J2czeWRwJqtVgu7uDvdwpSq74XdZT1Tmx93aWsVJEpTPSzSnZAKKLrZe+cWuzLCrXZGW1UsGWzpVCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T04:07:18.578358Z","signed_message":"canonical_sha256_bytes"},"source_id":"1111.7171","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:10b3ab863f04995f691caf3494d65cf714c9f443099a5d488ff6c1cc2c11bb02","sha256:edfeb09871b911a11c7dbc30e62dc82e983e11e8e425e2acbf6ab5e8e79800a5"],"state_sha256":"e9ffee7adc42a829fc79b05463a3c456be5fc97175d54216698530439d7ea9cf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QiyPvqmRgjrmLH2ac87oSTv3od7JX3KAOLZnHN9UfJjqSaBE/ZpOaq/otRTSg4Hus84qVjaI8QJTRW4TiDRBDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T12:20:04.132240Z","bundle_sha256":"41cad4fe2f596d445f1d2b53f81f94f2a467f0de61c8e5beb75bf3dfc6aece63"}}