{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:KTJ3WJ5WEJWDU5JKZPNTO54WOP","short_pith_number":"pith:KTJ3WJ5W","schema_version":"1.0","canonical_sha256":"54d3bb27b6226c3a752acbdb37779673ff5eb42a2d6ab1022a8bba16d487cafb","source":{"kind":"arxiv","id":"1803.01384","version":2},"attestation_state":"computed","paper":{"title":"Data Curation with Deep Learning [Vision]","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"AnHai Doan, Mourad Ouzzani, Nan Tang, Saravanan Thirumuruganathan","submitted_at":"2018-03-04T17:08:45Z","abstract_excerpt":"Data curation - the process of discovering, integrating, and cleaning data - is one of the oldest, hardest, yet inevitable data management problems. Despite decades of efforts from both researchers and practitioners, it is still one of the most time consuming and least enjoyable work of data scientists. In most organizations, data curation plays an important role so as to fully unlock the value of big data. Unfortunately, the current solutions are not keeping up with the ever-changing data ecosystem, because they often require substantially high human cost. Meanwhile, deep learning is making s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1803.01384","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-03-04T17:08:45Z","cross_cats_sorted":[],"title_canon_sha256":"bc2be63d2b0094858cdd8f9ae8a1b45fe4a8fc27ea20cfa2368cd739a176ace1","abstract_canon_sha256":"906f46331be3fc18f45170df5f0895300738ec0c688e82677001295496c583b6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:38.754385Z","signature_b64":"NRoHQY9XZ7/Cdpu0f7NVCP52B8PBPT6n3gD3nusYfDE1qSzp/5pj9IaZCohlRj0vhJKcu3SORaPvT3IpRdjbAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"54d3bb27b6226c3a752acbdb37779673ff5eb42a2d6ab1022a8bba16d487cafb","last_reissued_at":"2026-05-17T23:50:38.753909Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:38.753909Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Data Curation with Deep Learning [Vision]","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"AnHai Doan, Mourad Ouzzani, Nan Tang, Saravanan Thirumuruganathan","submitted_at":"2018-03-04T17:08:45Z","abstract_excerpt":"Data curation - the process of discovering, integrating, and cleaning data - is one of the oldest, hardest, yet inevitable data management problems. Despite decades of efforts from both researchers and practitioners, it is still one of the most time consuming and least enjoyable work of data scientists. In most organizations, data curation plays an important role so as to fully unlock the value of big data. Unfortunately, the current solutions are not keeping up with the ever-changing data ecosystem, because they often require substantially high human cost. Meanwhile, deep learning is making s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.01384","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1803.01384","created_at":"2026-05-17T23:50:38.753990+00:00"},{"alias_kind":"arxiv_version","alias_value":"1803.01384v2","created_at":"2026-05-17T23:50:38.753990+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.01384","created_at":"2026-05-17T23:50:38.753990+00:00"},{"alias_kind":"pith_short_12","alias_value":"KTJ3WJ5WEJWD","created_at":"2026-05-18T12:32:33.847187+00:00"},{"alias_kind":"pith_short_16","alias_value":"KTJ3WJ5WEJWDU5JK","created_at":"2026-05-18T12:32:33.847187+00:00"},{"alias_kind":"pith_short_8","alias_value":"KTJ3WJ5W","created_at":"2026-05-18T12:32:33.847187+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP","json":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP.json","graph_json":"https://pith.science/api/pith-number/KTJ3WJ5WEJWDU5JKZPNTO54WOP/graph.json","events_json":"https://pith.science/api/pith-number/KTJ3WJ5WEJWDU5JKZPNTO54WOP/events.json","paper":"https://pith.science/paper/KTJ3WJ5W"},"agent_actions":{"view_html":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP","download_json":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP.json","view_paper":"https://pith.science/paper/KTJ3WJ5W","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1803.01384&json=true","fetch_graph":"https://pith.science/api/pith-number/KTJ3WJ5WEJWDU5JKZPNTO54WOP/graph.json","fetch_events":"https://pith.science/api/pith-number/KTJ3WJ5WEJWDU5JKZPNTO54WOP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP/action/storage_attestation","attest_author":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP/action/author_attestation","sign_citation":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP/action/citation_signature","submit_replication":"https://pith.science/pith/KTJ3WJ5WEJWDU5JKZPNTO54WOP/action/replication_record"}},"created_at":"2026-05-17T23:50:38.753990+00:00","updated_at":"2026-05-17T23:50:38.753990+00:00"}