{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:PEBBRFRSGHP5W2YDPUVFLGZP3N","short_pith_number":"pith:PEBBRFRS","schema_version":"1.0","canonical_sha256":"790218963231dfdb6b037d2a559b2fdb445666b07faa082149e59dc8e1c0d885","source":{"kind":"arxiv","id":"1712.06223","version":1},"attestation_state":"computed","paper":{"title":"Error-Tolerant Big Data Processing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Dong Deng","submitted_at":"2017-12-18T01:57:39Z","abstract_excerpt":"Real-world data contains various kinds of errors. Before analyzing data, one usually needs to process the raw data. However, traditional data processing based on exactly match often misses lots of valid information. To get high-quality analysis results and fit in the big data era, this thesis studies the error-tolerant big data processing. As most of the data in real world can be represented as a sequence or a set, this thesis utilizes the widely-used sequence-based and set-based similar functions to tolerate errors in data processing and studies the approximate entity extraction, similarity j"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1712.06223","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-12-18T01:57:39Z","cross_cats_sorted":[],"title_canon_sha256":"41a31f3eabc3116b2e4ed62ae19dd05b956194850162f0140d77b906e032fced","abstract_canon_sha256":"c06595c56b95ce71569f38a6be1fed07fe858e0c15d92a971d716fe0bcb015ef"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:27:50.814080Z","signature_b64":"15eval3faMRgBj8ch9kvQEaR5XR/pBewOKeZ5STbxk/2Nb3+3JThZJfYmWmJicwwioZqDcJk2N8HCM5DdrIsCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"790218963231dfdb6b037d2a559b2fdb445666b07faa082149e59dc8e1c0d885","last_reissued_at":"2026-05-18T00:27:50.813511Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:27:50.813511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Error-Tolerant Big Data Processing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Dong Deng","submitted_at":"2017-12-18T01:57:39Z","abstract_excerpt":"Real-world data contains various kinds of errors. Before analyzing data, one usually needs to process the raw data. However, traditional data processing based on exactly match often misses lots of valid information. To get high-quality analysis results and fit in the big data era, this thesis studies the error-tolerant big data processing. As most of the data in real world can be represented as a sequence or a set, this thesis utilizes the widely-used sequence-based and set-based similar functions to tolerate errors in data processing and studies the approximate entity extraction, similarity j"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.06223","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1712.06223","created_at":"2026-05-18T00:27:50.813619+00:00"},{"alias_kind":"arxiv_version","alias_value":"1712.06223v1","created_at":"2026-05-18T00:27:50.813619+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.06223","created_at":"2026-05-18T00:27:50.813619+00:00"},{"alias_kind":"pith_short_12","alias_value":"PEBBRFRSGHP5","created_at":"2026-05-18T12:31:37.085036+00:00"},{"alias_kind":"pith_short_16","alias_value":"PEBBRFRSGHP5W2YD","created_at":"2026-05-18T12:31:37.085036+00:00"},{"alias_kind":"pith_short_8","alias_value":"PEBBRFRS","created_at":"2026-05-18T12:31:37.085036+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N","json":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N.json","graph_json":"https://pith.science/api/pith-number/PEBBRFRSGHP5W2YDPUVFLGZP3N/graph.json","events_json":"https://pith.science/api/pith-number/PEBBRFRSGHP5W2YDPUVFLGZP3N/events.json","paper":"https://pith.science/paper/PEBBRFRS"},"agent_actions":{"view_html":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N","download_json":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N.json","view_paper":"https://pith.science/paper/PEBBRFRS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1712.06223&json=true","fetch_graph":"https://pith.science/api/pith-number/PEBBRFRSGHP5W2YDPUVFLGZP3N/graph.json","fetch_events":"https://pith.science/api/pith-number/PEBBRFRSGHP5W2YDPUVFLGZP3N/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N/action/storage_attestation","attest_author":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N/action/author_attestation","sign_citation":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N/action/citation_signature","submit_replication":"https://pith.science/pith/PEBBRFRSGHP5W2YDPUVFLGZP3N/action/replication_record"}},"created_at":"2026-05-18T00:27:50.813619+00:00","updated_at":"2026-05-18T00:27:50.813619+00:00"}