{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:LGMCD5IXV5Z7WP23U73TOLBS44","short_pith_number":"pith:LGMCD5IX","canonical_record":{"source":{"id":"1709.10436","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-09-29T14:48:56Z","cross_cats_sorted":[],"title_canon_sha256":"31f7db6c3a824623da26fa4a1e5063364db05ab7e3e14ee95cd28f9fb0fc40ef","abstract_canon_sha256":"38caf79ffe30188696de398865d5a6c64a375e5d0ff702a6dbd2b36cdd151218"},"schema_version":"1.0"},"canonical_sha256":"599821f517af73fb3f5ba7f7372c32e710d45e62d2249659ff5f98429ed02e44","source":{"kind":"arxiv","id":"1709.10436","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.10436","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"arxiv_version","alias_value":"1709.10436v4","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.10436","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"pith_short_12","alias_value":"LGMCD5IXV5Z7","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_16","alias_value":"LGMCD5IXV5Z7WP23","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_8","alias_value":"LGMCD5IX","created_at":"2026-05-18T12:31:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:LGMCD5IXV5Z7WP23U73TOLBS44","target":"record","payload":{"canonical_record":{"source":{"id":"1709.10436","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-09-29T14:48:56Z","cross_cats_sorted":[],"title_canon_sha256":"31f7db6c3a824623da26fa4a1e5063364db05ab7e3e14ee95cd28f9fb0fc40ef","abstract_canon_sha256":"38caf79ffe30188696de398865d5a6c64a375e5d0ff702a6dbd2b36cdd151218"},"schema_version":"1.0"},"canonical_sha256":"599821f517af73fb3f5ba7f7372c32e710d45e62d2249659ff5f98429ed02e44","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:09:36.429573Z","signature_b64":"Fo0D3UJXCNwJXmvKC8sFNcyEbFpDFwFzSOGAIUslLZOXdZt/BnB9D2zh7NVJUN72yS/hW+eDoCphHUv88+xJAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"599821f517af73fb3f5ba7f7372c32e710d45e62d2249659ff5f98429ed02e44","last_reissued_at":"2026-05-18T00:09:36.429108Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:09:36.429108Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.10436","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:09:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3JzyNiQeoaLV+9Mf4JSHAnqezsqMAV8/cgInlEgX3OLI48TmVSw2EMx0ysb5d1uWktIFXbLaOBs2Puw8GVuZAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T06:04:47.230967Z"},"content_sha256":"735996e260146596723a06456ef4f2d5d336fa38e55f2b3a683e6d3a6c98aad2","schema_version":"1.0","event_id":"sha256:735996e260146596723a06456ef4f2d5d336fa38e55f2b3a683e6d3a6c98aad2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:LGMCD5IXV5Z7WP23U73TOLBS44","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Unsupervised String Transformation Learning for Entity Consolidation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Ahmed Elmagarmid, Dong Deng, Guoliang Li, Ihab F. Ilyas, Michael Stonebraker, Mourad Ouzzani, Nan Tang, Samuel Madden, Wenbo Tao, Ziawasch Abedjan","submitted_at":"2017-09-29T14:48:56Z","abstract_excerpt":"Data integration has been a long-standing challenge in data management with many applications. A key step in data integration is entity consolidation. It takes a collection of clusters of duplicate records as input and produces a single \"golden record\" for each cluster, which contains the canonical value for each attribute. Truth discovery and data fusion methods, as well as Master Data Management (MDM) systems, can be used for entity consolidation. However, to achieve better results, the variant values (i.e., values that are logically the same with different formats) in the clusters need to b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.10436","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:09:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pShxLPzP0If6+YMeLBqUwNKYvZTPlRjL4RdyQjtZkKXJo/ctMvDDd2kEf1yb7JjmuL4LbJql7ClgAjfXhuySBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T06:04:47.231311Z"},"content_sha256":"94612a5a5ee39859835f1668472b77de45e19af8062ab54075ccfff570a4f533","schema_version":"1.0","event_id":"sha256:94612a5a5ee39859835f1668472b77de45e19af8062ab54075ccfff570a4f533"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LGMCD5IXV5Z7WP23U73TOLBS44/bundle.json","state_url":"https://pith.science/pith/LGMCD5IXV5Z7WP23U73TOLBS44/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LGMCD5IXV5Z7WP23U73TOLBS44/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T06:04:47Z","links":{"resolver":"https://pith.science/pith/LGMCD5IXV5Z7WP23U73TOLBS44","bundle":"https://pith.science/pith/LGMCD5IXV5Z7WP23U73TOLBS44/bundle.json","state":"https://pith.science/pith/LGMCD5IXV5Z7WP23U73TOLBS44/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LGMCD5IXV5Z7WP23U73TOLBS44/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:LGMCD5IXV5Z7WP23U73TOLBS44","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"38caf79ffe30188696de398865d5a6c64a375e5d0ff702a6dbd2b36cdd151218","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-09-29T14:48:56Z","title_canon_sha256":"31f7db6c3a824623da26fa4a1e5063364db05ab7e3e14ee95cd28f9fb0fc40ef"},"schema_version":"1.0","source":{"id":"1709.10436","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.10436","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"arxiv_version","alias_value":"1709.10436v4","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.10436","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"pith_short_12","alias_value":"LGMCD5IXV5Z7","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_16","alias_value":"LGMCD5IXV5Z7WP23","created_at":"2026-05-18T12:31:28Z"},{"alias_kind":"pith_short_8","alias_value":"LGMCD5IX","created_at":"2026-05-18T12:31:28Z"}],"graph_snapshots":[{"event_id":"sha256:94612a5a5ee39859835f1668472b77de45e19af8062ab54075ccfff570a4f533","target":"graph","created_at":"2026-05-18T00:09:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Data integration has been a long-standing challenge in data management with many applications. A key step in data integration is entity consolidation. It takes a collection of clusters of duplicate records as input and produces a single \"golden record\" for each cluster, which contains the canonical value for each attribute. Truth discovery and data fusion methods, as well as Master Data Management (MDM) systems, can be used for entity consolidation. However, to achieve better results, the variant values (i.e., values that are logically the same with different formats) in the clusters need to b","authors_text":"Ahmed Elmagarmid, Dong Deng, Guoliang Li, Ihab F. Ilyas, Michael Stonebraker, Mourad Ouzzani, Nan Tang, Samuel Madden, Wenbo Tao, Ziawasch Abedjan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-09-29T14:48:56Z","title":"Unsupervised String Transformation Learning for Entity Consolidation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.10436","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:735996e260146596723a06456ef4f2d5d336fa38e55f2b3a683e6d3a6c98aad2","target":"record","created_at":"2026-05-18T00:09:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"38caf79ffe30188696de398865d5a6c64a375e5d0ff702a6dbd2b36cdd151218","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-09-29T14:48:56Z","title_canon_sha256":"31f7db6c3a824623da26fa4a1e5063364db05ab7e3e14ee95cd28f9fb0fc40ef"},"schema_version":"1.0","source":{"id":"1709.10436","kind":"arxiv","version":4}},"canonical_sha256":"599821f517af73fb3f5ba7f7372c32e710d45e62d2249659ff5f98429ed02e44","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"599821f517af73fb3f5ba7f7372c32e710d45e62d2249659ff5f98429ed02e44","first_computed_at":"2026-05-18T00:09:36.429108Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:09:36.429108Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Fo0D3UJXCNwJXmvKC8sFNcyEbFpDFwFzSOGAIUslLZOXdZt/BnB9D2zh7NVJUN72yS/hW+eDoCphHUv88+xJAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:09:36.429573Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.10436","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:735996e260146596723a06456ef4f2d5d336fa38e55f2b3a683e6d3a6c98aad2","sha256:94612a5a5ee39859835f1668472b77de45e19af8062ab54075ccfff570a4f533"],"state_sha256":"1cadbcea6926233d9783c0996a2dc724e4b36e8d1d3216e3a9168f47d7d1c2c6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WO4mRlyGLozPtcbwb8/5Q7EW5OO5MWVPGJ5S7ID56Ngncz9RID8TbUcAdT3CEjwZ/2HVYgetQHV2iEa/dvESCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T06:04:47.233289Z","bundle_sha256":"031b29ca237762d8bc5c90197b60efc4a677f858c973f9074e5cd3be0728ed41"}}