{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:Z7SRJ3QIM3EJBL6B2PG4IAZMMF","short_pith_number":"pith:Z7SRJ3QI","schema_version":"1.0","canonical_sha256":"cfe514ee0866c890afc1d3cdc4032c6155a1c56bb7e09bdb92b054e1365bf225","source":{"kind":"arxiv","id":"1903.09238","version":1},"attestation_state":"computed","paper":{"title":"Scalable Similarity Joins of Tokenized Strings","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB"],"primary_cat":"cs.IR","authors_text":"Ahmed Metwally, Chun-Heng Huang","submitted_at":"2019-03-21T21:16:28Z","abstract_excerpt":"This work tackles the problem of fuzzy joining of strings that naturally tokenize into meaningful substrings, e.g., full names. Tokenized-string joins have several established applications in the context of data integration and cleaning. This work is primarily motivated by fraud detection, where attackers slightly modify tokenized strings, e.g., names on accounts, to create numerous identities that she can use to defraud service providers, e.g., Google, and LinkedIn. To detect such attacks, all the accounts are pair-wise compared, and the resulting similar accounts are considered suspicious an"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.09238","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-03-21T21:16:28Z","cross_cats_sorted":["cs.DB"],"title_canon_sha256":"5b1f9010e7d1a9e580ff4cf07ec3143e11aed7c3340198caea14fc481d3d5404","abstract_canon_sha256":"feaa677c532ed2865dfc91d99147c981abc0c3e1386b5d92b8f3499c95f291c5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:40.253860Z","signature_b64":"3gpD2vJ4M6k4JXoGreAjto1JpOGhsXzA7kcNbJzB0CW0Yo5RG2mz4DjGhmXpQ5RBuBFz1zRlj/m5L2QYPS6qDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cfe514ee0866c890afc1d3cdc4032c6155a1c56bb7e09bdb92b054e1365bf225","last_reissued_at":"2026-05-17T23:50:40.253266Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:40.253266Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Scalable Similarity Joins of Tokenized Strings","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB"],"primary_cat":"cs.IR","authors_text":"Ahmed Metwally, Chun-Heng Huang","submitted_at":"2019-03-21T21:16:28Z","abstract_excerpt":"This work tackles the problem of fuzzy joining of strings that naturally tokenize into meaningful substrings, e.g., full names. Tokenized-string joins have several established applications in the context of data integration and cleaning. This work is primarily motivated by fraud detection, where attackers slightly modify tokenized strings, e.g., names on accounts, to create numerous identities that she can use to defraud service providers, e.g., Google, and LinkedIn. To detect such attacks, all the accounts are pair-wise compared, and the resulting similar accounts are considered suspicious an"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.09238","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.09238","created_at":"2026-05-17T23:50:40.253365+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.09238v1","created_at":"2026-05-17T23:50:40.253365+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.09238","created_at":"2026-05-17T23:50:40.253365+00:00"},{"alias_kind":"pith_short_12","alias_value":"Z7SRJ3QIM3EJ","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"Z7SRJ3QIM3EJBL6B","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"Z7SRJ3QI","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF","json":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF.json","graph_json":"https://pith.science/api/pith-number/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/graph.json","events_json":"https://pith.science/api/pith-number/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/events.json","paper":"https://pith.science/paper/Z7SRJ3QI"},"agent_actions":{"view_html":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF","download_json":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF.json","view_paper":"https://pith.science/paper/Z7SRJ3QI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.09238&json=true","fetch_graph":"https://pith.science/api/pith-number/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/graph.json","fetch_events":"https://pith.science/api/pith-number/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/action/storage_attestation","attest_author":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/action/author_attestation","sign_citation":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/action/citation_signature","submit_replication":"https://pith.science/pith/Z7SRJ3QIM3EJBL6B2PG4IAZMMF/action/replication_record"}},"created_at":"2026-05-17T23:50:40.253365+00:00","updated_at":"2026-05-17T23:50:40.253365+00:00"}