{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:5WFYX33TUKN7J6M4DJQZCLWNAW","short_pith_number":"pith:5WFYX33T","canonical_record":{"source":{"id":"1204.3677","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-04-17T00:59:53Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"0695937aad7869880a1c6949af76b7719b5f36269109d92e32f9984367ca91ea","abstract_canon_sha256":"882c91a19814dcdb2c425d400e7e61ef280c562077d75c4e2cfda1437ef1a95f"},"schema_version":"1.0"},"canonical_sha256":"ed8b8bef73a29bf4f99c1a61912ecd05ba105c902814778bb27b032321e9e849","source":{"kind":"arxiv","id":"1204.3677","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1204.3677","created_at":"2026-05-18T03:57:39Z"},{"alias_kind":"arxiv_version","alias_value":"1204.3677v1","created_at":"2026-05-18T03:57:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1204.3677","created_at":"2026-05-18T03:57:39Z"},{"alias_kind":"pith_short_12","alias_value":"5WFYX33TUKN7","created_at":"2026-05-18T12:26:56Z"},{"alias_kind":"pith_short_16","alias_value":"5WFYX33TUKN7J6M4","created_at":"2026-05-18T12:26:56Z"},{"alias_kind":"pith_short_8","alias_value":"5WFYX33T","created_at":"2026-05-18T12:26:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:5WFYX33TUKN7J6M4DJQZCLWNAW","target":"record","payload":{"canonical_record":{"source":{"id":"1204.3677","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-04-17T00:59:53Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"0695937aad7869880a1c6949af76b7719b5f36269109d92e32f9984367ca91ea","abstract_canon_sha256":"882c91a19814dcdb2c425d400e7e61ef280c562077d75c4e2cfda1437ef1a95f"},"schema_version":"1.0"},"canonical_sha256":"ed8b8bef73a29bf4f99c1a61912ecd05ba105c902814778bb27b032321e9e849","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:57:39.960025Z","signature_b64":"XPX7hsAVe9NEgAHZ22hg8n09wvh/IDSklngqEy5xnziGuDpU2TuV4qYkuawLBHJ1uspf2pSQQ7EyKMIqK/2RBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ed8b8bef73a29bf4f99c1a61912ecd05ba105c902814778bb27b032321e9e849","last_reissued_at":"2026-05-18T03:57:39.959409Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:57:39.959409Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1204.3677","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:57:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"x2Q2J38KGPRMba5KOyihkgo0dFDetPWCVKq84vOqg2iRLeOzINBuUFdvLS1AhSXhWjXdmXZ9onjYYV3J0KvAAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T22:05:48.472589Z"},"content_sha256":"4b2fb85902801c31e09ab4dc871071c2333f7179f3fe64a32d7797525aa1bdbd","schema_version":"1.0","event_id":"sha256:4b2fb85902801c31e09ab4dc871071c2333f7179f3fe64a32d7797525aa1bdbd"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:5WFYX33TUKN7J6M4DJQZCLWNAW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bayesian Data Cleaning for Web Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.DB","authors_text":"Subbarao Kambhampati, Sushovan De, Yi Chen, Yuheng Hu","submitted_at":"2012-04-17T00:59:53Z","abstract_excerpt":"Data Cleaning is a long standing problem, which is growing in importance with the mass of uncurated web data. State of the art approaches for handling inconsistent data are systems that learn and use conditional functional dependencies (CFDs) to rectify data. These methods learn data patterns--CFDs--from a clean sample of the data and use them to rectify the dirty/inconsistent data. While getting a clean training sample is feasible in enterprise data scenarios, it is infeasible in web databases where there is no separate curated data. CFD based methods are unfortunately particularly sensitive "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1204.3677","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:57:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Mj/3wfLwx5hMchMNAzP+fl5yXbCepMcDVkOhZVFRdAql8ZMxd2WKD4S3YGUJeh+BfahhP1SINqPX74j5RCOWCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T22:05:48.473294Z"},"content_sha256":"6950b58fc394c534f76616cb93fda3698b6a89b244269b3fc3d973a8c656bb3e","schema_version":"1.0","event_id":"sha256:6950b58fc394c534f76616cb93fda3698b6a89b244269b3fc3d973a8c656bb3e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5WFYX33TUKN7J6M4DJQZCLWNAW/bundle.json","state_url":"https://pith.science/pith/5WFYX33TUKN7J6M4DJQZCLWNAW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5WFYX33TUKN7J6M4DJQZCLWNAW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T22:05:48Z","links":{"resolver":"https://pith.science/pith/5WFYX33TUKN7J6M4DJQZCLWNAW","bundle":"https://pith.science/pith/5WFYX33TUKN7J6M4DJQZCLWNAW/bundle.json","state":"https://pith.science/pith/5WFYX33TUKN7J6M4DJQZCLWNAW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5WFYX33TUKN7J6M4DJQZCLWNAW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:5WFYX33TUKN7J6M4DJQZCLWNAW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"882c91a19814dcdb2c425d400e7e61ef280c562077d75c4e2cfda1437ef1a95f","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-04-17T00:59:53Z","title_canon_sha256":"0695937aad7869880a1c6949af76b7719b5f36269109d92e32f9984367ca91ea"},"schema_version":"1.0","source":{"id":"1204.3677","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1204.3677","created_at":"2026-05-18T03:57:39Z"},{"alias_kind":"arxiv_version","alias_value":"1204.3677v1","created_at":"2026-05-18T03:57:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1204.3677","created_at":"2026-05-18T03:57:39Z"},{"alias_kind":"pith_short_12","alias_value":"5WFYX33TUKN7","created_at":"2026-05-18T12:26:56Z"},{"alias_kind":"pith_short_16","alias_value":"5WFYX33TUKN7J6M4","created_at":"2026-05-18T12:26:56Z"},{"alias_kind":"pith_short_8","alias_value":"5WFYX33T","created_at":"2026-05-18T12:26:56Z"}],"graph_snapshots":[{"event_id":"sha256:6950b58fc394c534f76616cb93fda3698b6a89b244269b3fc3d973a8c656bb3e","target":"graph","created_at":"2026-05-18T03:57:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Data Cleaning is a long standing problem, which is growing in importance with the mass of uncurated web data. State of the art approaches for handling inconsistent data are systems that learn and use conditional functional dependencies (CFDs) to rectify data. These methods learn data patterns--CFDs--from a clean sample of the data and use them to rectify the dirty/inconsistent data. While getting a clean training sample is feasible in enterprise data scenarios, it is infeasible in web databases where there is no separate curated data. CFD based methods are unfortunately particularly sensitive ","authors_text":"Subbarao Kambhampati, Sushovan De, Yi Chen, Yuheng Hu","cross_cats":["cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-04-17T00:59:53Z","title":"Bayesian Data Cleaning for Web Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1204.3677","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4b2fb85902801c31e09ab4dc871071c2333f7179f3fe64a32d7797525aa1bdbd","target":"record","created_at":"2026-05-18T03:57:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"882c91a19814dcdb2c425d400e7e61ef280c562077d75c4e2cfda1437ef1a95f","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-04-17T00:59:53Z","title_canon_sha256":"0695937aad7869880a1c6949af76b7719b5f36269109d92e32f9984367ca91ea"},"schema_version":"1.0","source":{"id":"1204.3677","kind":"arxiv","version":1}},"canonical_sha256":"ed8b8bef73a29bf4f99c1a61912ecd05ba105c902814778bb27b032321e9e849","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ed8b8bef73a29bf4f99c1a61912ecd05ba105c902814778bb27b032321e9e849","first_computed_at":"2026-05-18T03:57:39.959409Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:57:39.959409Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"XPX7hsAVe9NEgAHZ22hg8n09wvh/IDSklngqEy5xnziGuDpU2TuV4qYkuawLBHJ1uspf2pSQQ7EyKMIqK/2RBA==","signature_status":"signed_v1","signed_at":"2026-05-18T03:57:39.960025Z","signed_message":"canonical_sha256_bytes"},"source_id":"1204.3677","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4b2fb85902801c31e09ab4dc871071c2333f7179f3fe64a32d7797525aa1bdbd","sha256:6950b58fc394c534f76616cb93fda3698b6a89b244269b3fc3d973a8c656bb3e"],"state_sha256":"90c38d3cce95d1964ca53acc2557fa4ac0c0c45cdca32fc11f60874926493efd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"B1CkHiwxccAf4s9TJJQF26+LT/ONT6Mgm6yKNYTEIrwkiJOEQlP+j07O+NMJl0VshhwAnBYxgB9R7W4A/BntBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T22:05:48.477102Z","bundle_sha256":"0dbc48da6cd2758d384ad5b8a5c867c0e0534bf4f6b02a4dcfb86d8b21315cf8"}}