{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:OCZNSSP2ZJYJF7REC6PLBQPJRB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"838e662ffc20ed8713a390a36272b5fc43309f5e11368d2c8d25c59a0f8466b4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-06-17T23:38:51Z","title_canon_sha256":"f22cae863746ca391440f839c03401db5b45fcfdb385167cf9a8b8c3d053efe7"},"schema_version":"1.0","source":{"id":"1606.05708","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.05708","created_at":"2026-05-18T01:12:16Z"},{"alias_kind":"arxiv_version","alias_value":"1606.05708v1","created_at":"2026-05-18T01:12:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.05708","created_at":"2026-05-18T01:12:16Z"},{"alias_kind":"pith_short_12","alias_value":"OCZNSSP2ZJYJ","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_16","alias_value":"OCZNSSP2ZJYJF7RE","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_8","alias_value":"OCZNSSP2","created_at":"2026-05-18T12:30:36Z"}],"graph_snapshots":[{"event_id":"sha256:7ea1736ae8d8e076167883f2e957f21f59d64367830b472aa7a35843c5f07f3c","target":"graph","created_at":"2026-05-18T01:12:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Visual analytics systems such as Tableau are increasingly popular for interactive data exploration. These tools, however, do not currently assist users with detecting or resolving potential data quality problems including the well-known deduplication problem. Recent approaches for deduplication focus on cleaning entire datasets and commonly require hundreds to thousands of user labels. In this paper, we address the problem of deduplication in the context of visual data analytics. We present a new approach for record deduplication that strives to produce the cleanest view possible with a limite","authors_text":"Dan Grossman, Hannaneh Hajishirzi, Kristi Morton, Magdalena Balazinska","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-06-17T23:38:51Z","title":"View-Driven Deduplication with Active Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.05708","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b65104963651c3b7916ff283317b372371c5d9271a7c67254ca6bd192453ab0f","target":"record","created_at":"2026-05-18T01:12:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"838e662ffc20ed8713a390a36272b5fc43309f5e11368d2c8d25c59a0f8466b4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-06-17T23:38:51Z","title_canon_sha256":"f22cae863746ca391440f839c03401db5b45fcfdb385167cf9a8b8c3d053efe7"},"schema_version":"1.0","source":{"id":"1606.05708","kind":"arxiv","version":1}},"canonical_sha256":"70b2d949faca7092fe24179eb0c1e9886cd595283ba04554e48383b6ae36f0de","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"70b2d949faca7092fe24179eb0c1e9886cd595283ba04554e48383b6ae36f0de","first_computed_at":"2026-05-18T01:12:16.211389Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:12:16.211389Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dzyQFmXuw8dd8KORik29pm96OhRo6WgIIy75CSGAMcw1eNQNuz3xmyeHRpPJ8FaIBrbmTSAEmlecrtUp7fAJCg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:12:16.211776Z","signed_message":"canonical_sha256_bytes"},"source_id":"1606.05708","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b65104963651c3b7916ff283317b372371c5d9271a7c67254ca6bd192453ab0f","sha256:7ea1736ae8d8e076167883f2e957f21f59d64367830b472aa7a35843c5f07f3c"],"state_sha256":"36eb48587fc43b87480b151ad1a6fbf774e0aeadc6a12919c803198b0eaf43f8"}