{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:6JLFMBLBNLWNZTPSFXAFPKMAVR","short_pith_number":"pith:6JLFMBLB","canonical_record":{"source":{"id":"2403.17344","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2024-03-26T03:07:32Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6807148e0c75ef4e4cadf5629922b634a6154a86281ab6f7893756523f23f07b","abstract_canon_sha256":"374bbfd4e183cf96012c5f52c666a33c47a84902d48494a4eae85dd893b5007f"},"schema_version":"1.0"},"canonical_sha256":"f2565605616aecdccdf22dc057a980ac76199373f867835aeb4530760d46f780","source":{"kind":"arxiv","id":"2403.17344","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2403.17344","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"arxiv_version","alias_value":"2403.17344v2","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2403.17344","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"pith_short_12","alias_value":"6JLFMBLBNLWN","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"pith_short_16","alias_value":"6JLFMBLBNLWNZTPS","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"pith_short_8","alias_value":"6JLFMBLB","created_at":"2026-07-05T08:24:41Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:6JLFMBLBNLWNZTPSFXAFPKMAVR","target":"record","payload":{"canonical_record":{"source":{"id":"2403.17344","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2024-03-26T03:07:32Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6807148e0c75ef4e4cadf5629922b634a6154a86281ab6f7893756523f23f07b","abstract_canon_sha256":"374bbfd4e183cf96012c5f52c666a33c47a84902d48494a4eae85dd893b5007f"},"schema_version":"1.0"},"canonical_sha256":"f2565605616aecdccdf22dc057a980ac76199373f867835aeb4530760d46f780","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T08:24:41.752672Z","signature_b64":"ZwYmI8vFbBWHX4cO/apKhsRuHiG29Ono0G+5F+thqqfyrHDOyCcYPWvGEaX1sZNDTEZU8/HCIly6XLWLUwPKCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f2565605616aecdccdf22dc057a980ac76199373f867835aeb4530760d46f780","last_reissued_at":"2026-07-05T08:24:41.752182Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T08:24:41.752182Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2403.17344","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:24:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a3LvAFvp9ASPM6YlekwZQbW8tCgAqxRJYg5Ww5FZ5oOpcRGuE3s79I1vBTS5R+ut0rRYi2QzSzRg/eiIIuTADw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:53:54.187166Z"},"content_sha256":"4c73516146b5f84dd02ec97110da267ea09527321c38f5e077d0b5e0da064081","schema_version":"1.0","event_id":"sha256:4c73516146b5f84dd02ec97110da267ea09527321c38f5e077d0b5e0da064081"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:6JLFMBLBNLWNZTPSFXAFPKMAVR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Disambiguate Entity Matching using Large Language Models through Relation Discovery","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.DB","authors_text":"Zezhou Huang","submitted_at":"2024-03-26T03:07:32Z","abstract_excerpt":"Entity matching is a critical challenge in data integration and cleaning, central to tasks like fuzzy joins and deduplication. Traditional approaches have focused on overcoming fuzzy term representations through methods such as edit distance, Jaccard similarity, and more recently, embeddings and deep neural networks, including advancements from large language models (LLMs) like GPT. However, the core challenge in entity matching extends beyond term fuzziness to the ambiguity in defining what constitutes a \"match,\" especially when integrating with external databases. This ambiguity arises due t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2403.17344","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2403.17344/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:24:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VtlddoS8d2jMwp+xy/8IHaXYSSX7Be8uVe4oIosPH1heJJRTB73poDLdKho8D/J5fnSL9ri+yxFdxEbwft4RCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:53:54.187561Z"},"content_sha256":"838fdb4ca9615ee0813df459beb5f148e3aa2803c18785d90fa4456fb01af7d0","schema_version":"1.0","event_id":"sha256:838fdb4ca9615ee0813df459beb5f148e3aa2803c18785d90fa4456fb01af7d0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR/bundle.json","state_url":"https://pith.science/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T15:53:54Z","links":{"resolver":"https://pith.science/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR","bundle":"https://pith.science/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR/bundle.json","state":"https://pith.science/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6JLFMBLBNLWNZTPSFXAFPKMAVR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:6JLFMBLBNLWNZTPSFXAFPKMAVR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"374bbfd4e183cf96012c5f52c666a33c47a84902d48494a4eae85dd893b5007f","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2024-03-26T03:07:32Z","title_canon_sha256":"6807148e0c75ef4e4cadf5629922b634a6154a86281ab6f7893756523f23f07b"},"schema_version":"1.0","source":{"id":"2403.17344","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2403.17344","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"arxiv_version","alias_value":"2403.17344v2","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2403.17344","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"pith_short_12","alias_value":"6JLFMBLBNLWN","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"pith_short_16","alias_value":"6JLFMBLBNLWNZTPS","created_at":"2026-07-05T08:24:41Z"},{"alias_kind":"pith_short_8","alias_value":"6JLFMBLB","created_at":"2026-07-05T08:24:41Z"}],"graph_snapshots":[{"event_id":"sha256:838fdb4ca9615ee0813df459beb5f148e3aa2803c18785d90fa4456fb01af7d0","target":"graph","created_at":"2026-07-05T08:24:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2403.17344/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Entity matching is a critical challenge in data integration and cleaning, central to tasks like fuzzy joins and deduplication. Traditional approaches have focused on overcoming fuzzy term representations through methods such as edit distance, Jaccard similarity, and more recently, embeddings and deep neural networks, including advancements from large language models (LLMs) like GPT. However, the core challenge in entity matching extends beyond term fuzziness to the ambiguity in defining what constitutes a \"match,\" especially when integrating with external databases. This ambiguity arises due t","authors_text":"Zezhou Huang","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2024-03-26T03:07:32Z","title":"Disambiguate Entity Matching using Large Language Models through Relation Discovery"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2403.17344","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4c73516146b5f84dd02ec97110da267ea09527321c38f5e077d0b5e0da064081","target":"record","created_at":"2026-07-05T08:24:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"374bbfd4e183cf96012c5f52c666a33c47a84902d48494a4eae85dd893b5007f","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2024-03-26T03:07:32Z","title_canon_sha256":"6807148e0c75ef4e4cadf5629922b634a6154a86281ab6f7893756523f23f07b"},"schema_version":"1.0","source":{"id":"2403.17344","kind":"arxiv","version":2}},"canonical_sha256":"f2565605616aecdccdf22dc057a980ac76199373f867835aeb4530760d46f780","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f2565605616aecdccdf22dc057a980ac76199373f867835aeb4530760d46f780","first_computed_at":"2026-07-05T08:24:41.752182Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T08:24:41.752182Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ZwYmI8vFbBWHX4cO/apKhsRuHiG29Ono0G+5F+thqqfyrHDOyCcYPWvGEaX1sZNDTEZU8/HCIly6XLWLUwPKCA==","signature_status":"signed_v1","signed_at":"2026-07-05T08:24:41.752672Z","signed_message":"canonical_sha256_bytes"},"source_id":"2403.17344","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4c73516146b5f84dd02ec97110da267ea09527321c38f5e077d0b5e0da064081","sha256:838fdb4ca9615ee0813df459beb5f148e3aa2803c18785d90fa4456fb01af7d0"],"state_sha256":"ad341e33460935e713a8cfc2d8b747f3dcf378479857112a0e306caa11e3d0db"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1NszaYzzBZeTj2swHGkPKsqYTttmBFB0Ek/YH3JylaUeSd1SMmhyuyguD0TSFNrz2aWYiCA0TUFIWnFlon65BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T15:53:54.190692Z","bundle_sha256":"956a59c08e0bed72a47e67e46c02889178e3d2fc815fc671ef178afcaefea77c"}}