{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:7O5QAWXWAIHEMDIAEXNEHYLADR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5aad386a94afbf8a1b886b6c924f1a05825a25cc9639f1b35275f72fa6ca678f","cross_cats_sorted":["cs.DL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-01-19T11:31:14Z","title_canon_sha256":"bb74be3e642b8169d6f9ea2d05dec305bfff91800cc9369b59f95e0e42c668d9"},"schema_version":"1.0","source":{"id":"1701.05377","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.05377","created_at":"2026-05-18T00:52:30Z"},{"alias_kind":"arxiv_version","alias_value":"1701.05377v1","created_at":"2026-05-18T00:52:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.05377","created_at":"2026-05-18T00:52:30Z"},{"alias_kind":"pith_short_12","alias_value":"7O5QAWXWAIHE","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"7O5QAWXWAIHEMDIA","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"7O5QAWXW","created_at":"2026-05-18T12:31:05Z"}],"graph_snapshots":[{"event_id":"sha256:5c2141b6447213cd695687c7cb3465ab3ec7a3cf72f1bb297ad51c832f8c29b1","target":"graph","created_at":"2026-05-18T00:52:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In the absence of ground truth it is not possible to automatically determine the exact spectrum and occurrences of OCR errors in an OCR'ed text. Yet, for interactive postcorrection of OCR'ed historical printings it is extremely useful to have a statistical profile available that provides an estimate of error classes with associated frequencies, and that points to conjectured errors and suspicious tokens. The method introduced in Reffle (2013) computes such a profile, combining lexica, pattern sets and advanced matching techniques in a specialized Expectation Maximization (EM) procedure. Here w","authors_text":"Florian Fink, Klaus-U. Schulz, Uwe Springmann","cross_cats":["cs.DL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-01-19T11:31:14Z","title":"Profiling of OCR'ed Historical Texts Revisited"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.05377","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9982089dbb95298bfb081eea9fef4d6fb5c25b03792dceb7031a689a567ad659","target":"record","created_at":"2026-05-18T00:52:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5aad386a94afbf8a1b886b6c924f1a05825a25cc9639f1b35275f72fa6ca678f","cross_cats_sorted":["cs.DL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-01-19T11:31:14Z","title_canon_sha256":"bb74be3e642b8169d6f9ea2d05dec305bfff91800cc9369b59f95e0e42c668d9"},"schema_version":"1.0","source":{"id":"1701.05377","kind":"arxiv","version":1}},"canonical_sha256":"fbbb005af6020e460d0025da43e1601c6f0c852b6704aafa55117c8aca5788ee","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fbbb005af6020e460d0025da43e1601c6f0c852b6704aafa55117c8aca5788ee","first_computed_at":"2026-05-18T00:52:30.059180Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:52:30.059180Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"t7D+BozhwAoL5DVaHjl7l/DKIf+6VdQfDUYjxIGqQ2l3bcHXlqHOJml9OSwF++2vraNhWRyaMMRW+Rj0uqFWCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:52:30.059620Z","signed_message":"canonical_sha256_bytes"},"source_id":"1701.05377","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9982089dbb95298bfb081eea9fef4d6fb5c25b03792dceb7031a689a567ad659","sha256:5c2141b6447213cd695687c7cb3465ab3ec7a3cf72f1bb297ad51c832f8c29b1"],"state_sha256":"8048e1c3d98defeb70863ac54b0087b5ba49b7224b983777152cb0c911dde399"}