{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:5F3GBJDPE6XUSMPBREUKHD6LYZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"590a72bb0bdc0c2ba9731962d12cd686e95ef41adfb7ee6914f1286d7bb54e94","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-02-25T05:49:36Z","title_canon_sha256":"f52a1645b6544bd3632a829e8450d6cfce759777f05e2c8f3ff31de7236b6cb7"},"schema_version":"1.0","source":{"id":"1602.07807","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1602.07807","created_at":"2026-05-18T01:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"1602.07807v2","created_at":"2026-05-18T01:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1602.07807","created_at":"2026-05-18T01:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"5F3GBJDPE6XU","created_at":"2026-05-18T12:30:01Z"},{"alias_kind":"pith_short_16","alias_value":"5F3GBJDPE6XUSMPB","created_at":"2026-05-18T12:30:01Z"},{"alias_kind":"pith_short_8","alias_value":"5F3GBJDP","created_at":"2026-05-18T12:30:01Z"}],"graph_snapshots":[{"event_id":"sha256:dda00d5fa312e2ed591398e33b4904bde35f10b352f5c46b6e965b5ba8b5613f","target":"graph","created_at":"2026-05-18T01:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many important forms of data are stored digitally in XML format. Errors can occur in the textual content of the data in the fields of the XML. Fixing these errors manually is time-consuming and expensive, especially for large amounts of data. There is increasing interest in the research, development, and use of automated techniques for assisting with data cleaning. Electronic dictionaries are an important form of data frequently stored in XML format that frequently have errors introduced through a mixture of manual typographical entry errors and optical character recognition errors. In this pa","authors_text":"Benjamin Strauss, Michael Bloodgood","cross_cats":["cs.CL","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-02-25T05:49:36Z","title":"Data Cleaning for XML Electronic Dictionaries via Statistical Anomaly Detection"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1602.07807","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:123b76670dc4504ecb3e984b74375d545cb05d86caf8c7358a27a64f10e976d8","target":"record","created_at":"2026-05-18T01:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"590a72bb0bdc0c2ba9731962d12cd686e95ef41adfb7ee6914f1286d7bb54e94","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-02-25T05:49:36Z","title_canon_sha256":"f52a1645b6544bd3632a829e8450d6cfce759777f05e2c8f3ff31de7236b6cb7"},"schema_version":"1.0","source":{"id":"1602.07807","kind":"arxiv","version":2}},"canonical_sha256":"e97660a46f27af4931e18928a38fcbc659d4fcc1e8177ea02a7c135f320772ca","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e97660a46f27af4931e18928a38fcbc659d4fcc1e8177ea02a7c135f320772ca","first_computed_at":"2026-05-18T01:17:24.559474Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:17:24.559474Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"j/hd1KbazYyRRbjfUdYVPI9h/MBUaMUM/8zL+Xst5zrg/Rc7urVNP6DDy5t9mW8tO0JCdxnQMCdr7G3MKwKXCA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:17:24.560147Z","signed_message":"canonical_sha256_bytes"},"source_id":"1602.07807","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:123b76670dc4504ecb3e984b74375d545cb05d86caf8c7358a27a64f10e976d8","sha256:dda00d5fa312e2ed591398e33b4904bde35f10b352f5c46b6e965b5ba8b5613f"],"state_sha256":"6eb12597aaa83576e1d69ce63eb8c40784a5dc299f1fbcf751628a3307af968d"}