{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:GUEAOC3NBT2H3SBD7A6FNFJBRB","short_pith_number":"pith:GUEAOC3N","canonical_record":{"source":{"id":"1905.11739","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-05-28T11:11:57Z","cross_cats_sorted":[],"title_canon_sha256":"64a2206d55d67068a9dba89dc9aa865d4d4678411812a620c0e61410520bd391","abstract_canon_sha256":"7dcd8876fb313f3b4f605893104482234c03f6db2e072699c8c809b32417b27a"},"schema_version":"1.0"},"canonical_sha256":"3508070b6d0cf47dc823f83c569521886cf4c0b97a093e22a174a8e6e6463a23","source":{"kind":"arxiv","id":"1905.11739","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.11739","created_at":"2026-05-17T23:44:52Z"},{"alias_kind":"arxiv_version","alias_value":"1905.11739v1","created_at":"2026-05-17T23:44:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.11739","created_at":"2026-05-17T23:44:52Z"},{"alias_kind":"pith_short_12","alias_value":"GUEAOC3NBT2H","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"GUEAOC3NBT2H3SBD","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"GUEAOC3N","created_at":"2026-05-18T12:33:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:GUEAOC3NBT2H3SBD7A6FNFJBRB","target":"record","payload":{"canonical_record":{"source":{"id":"1905.11739","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-05-28T11:11:57Z","cross_cats_sorted":[],"title_canon_sha256":"64a2206d55d67068a9dba89dc9aa865d4d4678411812a620c0e61410520bd391","abstract_canon_sha256":"7dcd8876fb313f3b4f605893104482234c03f6db2e072699c8c809b32417b27a"},"schema_version":"1.0"},"canonical_sha256":"3508070b6d0cf47dc823f83c569521886cf4c0b97a093e22a174a8e6e6463a23","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:52.447746Z","signature_b64":"qr3pHFkfw1Kk5VNWY33octVv+hY2xOGFZbWBDfHK1W1dZEx25REWRwNf27/bnnNbmC6kLReXXgPBk2eKJxfkAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3508070b6d0cf47dc823f83c569521886cf4c0b97a093e22a174a8e6e6463a23","last_reissued_at":"2026-05-17T23:44:52.447084Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:52.447084Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1905.11739","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fV3CH8oUuhmFK8TJMOo6KW7w4hUFHaOpG6J83Jzk7blaKfyfFhyT/FXaGBLh9yvBAyUS2F3GccyaSmXU0k/+AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T21:01:35.195903Z"},"content_sha256":"89a56f35103e57a404c09da1ea3cb4d46025263e3c44a77009030aee732b150f","schema_version":"1.0","event_id":"sha256:89a56f35103e57a404c09da1ea3cb4d46025263e3c44a77009030aee732b150f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:GUEAOC3NBT2H3SBD7A6FNFJBRB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Cost Efficient Approach to Correct OCR Errors in Large Document Collections","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"C. V. Jawahar, Deepayan Das, Jerin Philip, Minesh Mathew","submitted_at":"2019-05-28T11:11:57Z","abstract_excerpt":"Word error rate of an ocr is often higher than its character error rate. This is especially true when ocrs are designed by recognizing characters. High word accuracies are critical to tasks like the creation of content in digital libraries and text-to-speech applications. In order to detect and correct the misrecognised words, it is common for an ocr module to employ a post-processor to further improve the word accuracy. However, conventional approaches to post-processing like looking up a dictionary or using a statistical language model (slm), are still limited. In many such scenarios, it is "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.11739","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tu0LP+8Sikuno64hMGL1WUk3CbfWMYjjnyVMuHFHkq5l+FVVb1+pcgaJu3trs919wdxv9XBDjcImyq6YeOjuCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T21:01:35.196399Z"},"content_sha256":"e9a0c4335519551acd2ec72fe520ed67ad8881674d63994d944e85c922e341db","schema_version":"1.0","event_id":"sha256:e9a0c4335519551acd2ec72fe520ed67ad8881674d63994d944e85c922e341db"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB/bundle.json","state_url":"https://pith.science/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T21:01:35Z","links":{"resolver":"https://pith.science/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB","bundle":"https://pith.science/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB/bundle.json","state":"https://pith.science/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GUEAOC3NBT2H3SBD7A6FNFJBRB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:GUEAOC3NBT2H3SBD7A6FNFJBRB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7dcd8876fb313f3b4f605893104482234c03f6db2e072699c8c809b32417b27a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-05-28T11:11:57Z","title_canon_sha256":"64a2206d55d67068a9dba89dc9aa865d4d4678411812a620c0e61410520bd391"},"schema_version":"1.0","source":{"id":"1905.11739","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.11739","created_at":"2026-05-17T23:44:52Z"},{"alias_kind":"arxiv_version","alias_value":"1905.11739v1","created_at":"2026-05-17T23:44:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.11739","created_at":"2026-05-17T23:44:52Z"},{"alias_kind":"pith_short_12","alias_value":"GUEAOC3NBT2H","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"GUEAOC3NBT2H3SBD","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"GUEAOC3N","created_at":"2026-05-18T12:33:18Z"}],"graph_snapshots":[{"event_id":"sha256:e9a0c4335519551acd2ec72fe520ed67ad8881674d63994d944e85c922e341db","target":"graph","created_at":"2026-05-17T23:44:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Word error rate of an ocr is often higher than its character error rate. This is especially true when ocrs are designed by recognizing characters. High word accuracies are critical to tasks like the creation of content in digital libraries and text-to-speech applications. In order to detect and correct the misrecognised words, it is common for an ocr module to employ a post-processor to further improve the word accuracy. However, conventional approaches to post-processing like looking up a dictionary or using a statistical language model (slm), are still limited. In many such scenarios, it is ","authors_text":"C. V. Jawahar, Deepayan Das, Jerin Philip, Minesh Mathew","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-05-28T11:11:57Z","title":"A Cost Efficient Approach to Correct OCR Errors in Large Document Collections"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.11739","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:89a56f35103e57a404c09da1ea3cb4d46025263e3c44a77009030aee732b150f","target":"record","created_at":"2026-05-17T23:44:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7dcd8876fb313f3b4f605893104482234c03f6db2e072699c8c809b32417b27a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-05-28T11:11:57Z","title_canon_sha256":"64a2206d55d67068a9dba89dc9aa865d4d4678411812a620c0e61410520bd391"},"schema_version":"1.0","source":{"id":"1905.11739","kind":"arxiv","version":1}},"canonical_sha256":"3508070b6d0cf47dc823f83c569521886cf4c0b97a093e22a174a8e6e6463a23","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3508070b6d0cf47dc823f83c569521886cf4c0b97a093e22a174a8e6e6463a23","first_computed_at":"2026-05-17T23:44:52.447084Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:52.447084Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qr3pHFkfw1Kk5VNWY33octVv+hY2xOGFZbWBDfHK1W1dZEx25REWRwNf27/bnnNbmC6kLReXXgPBk2eKJxfkAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:52.447746Z","signed_message":"canonical_sha256_bytes"},"source_id":"1905.11739","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:89a56f35103e57a404c09da1ea3cb4d46025263e3c44a77009030aee732b150f","sha256:e9a0c4335519551acd2ec72fe520ed67ad8881674d63994d944e85c922e341db"],"state_sha256":"2db72000cd5cf60c43d6c60486959d4de0d445983fc515e829bfaf148f323ae6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5KPx5LsH0BfqhaEfiJDMgJ6ysF/qLBLWVfvQgefIG71Wl/7R/DUJVs21S5P75PJuPqkEqaN4M+Ruaw2hIsuYBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T21:01:35.198694Z","bundle_sha256":"d445b19a70fb13c41ca4abf94c1b34bb14554d449e94d4c914ca318e80fbd1bf"}}