{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:PKIS67UISFBP2MVBTUHNBPTJPV","short_pith_number":"pith:PKIS67UI","canonical_record":{"source":{"id":"2304.03427","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-04-07T00:45:12Z","cross_cats_sorted":["cs.AI","cs.CY","cs.LG"],"title_canon_sha256":"d728a5ca1fac5e1d09a29c306b7ec1e20a19803290896b437705de296bd91008","abstract_canon_sha256":"e3ccd5c1fcefd23d7f450851352a2b2bb0b604db5938d8901caadbaf323d92e0"},"schema_version":"1.0"},"canonical_sha256":"7a912f7e889142fd32a19d0ed0be697d5060e8a4c4ff821c575afec781867bb2","source":{"kind":"arxiv","id":"2304.03427","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2304.03427","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"arxiv_version","alias_value":"2304.03427v2","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2304.03427","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"pith_short_12","alias_value":"PKIS67UISFBP","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"pith_short_16","alias_value":"PKIS67UISFBP2MVB","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"pith_short_8","alias_value":"PKIS67UI","created_at":"2026-05-20T00:02:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:PKIS67UISFBP2MVBTUHNBPTJPV","target":"record","payload":{"canonical_record":{"source":{"id":"2304.03427","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-04-07T00:45:12Z","cross_cats_sorted":["cs.AI","cs.CY","cs.LG"],"title_canon_sha256":"d728a5ca1fac5e1d09a29c306b7ec1e20a19803290896b437705de296bd91008","abstract_canon_sha256":"e3ccd5c1fcefd23d7f450851352a2b2bb0b604db5938d8901caadbaf323d92e0"},"schema_version":"1.0"},"canonical_sha256":"7a912f7e889142fd32a19d0ed0be697d5060e8a4c4ff821c575afec781867bb2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:01.144713Z","signature_b64":"Ll3r1rVymbgl6ZzXPGYvd+psMrmrjCxKNr8WkBQjzaGvfXRsV1HA3xFu2rGx2WTkPXdo2Rf9vqKexIijKuUXAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7a912f7e889142fd32a19d0ed0be697d5060e8a4c4ff821c575afec781867bb2","last_reissued_at":"2026-05-20T00:02:01.143957Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:01.143957Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2304.03427","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"itj3qTjUMPWWR7lwm9p//ryhyKwHi+IF4Q3pFTJoOeqQlcqA+MndWHWwwzJas4fBTMWi3aAUwmJTsDQv4ze1Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T18:16:54.852756Z"},"content_sha256":"33de27b76d20ab5852bae19790baf2b95909fa410bf94244af1fcc01cd69c2ca","schema_version":"1.0","event_id":"sha256:33de27b76d20ab5852bae19790baf2b95909fa410bf94244af1fcc01cd69c2ca"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:PKIS67UISFBP2MVBTUHNBPTJPV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Cleansing Jewel: A Neural Spelling Correction Model Built On Google OCR-ed Tibetan Manuscripts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CY","cs.LG"],"primary_cat":"cs.CL","authors_text":"Queenie Luo, Yung-Sung Chuang","submitted_at":"2023-04-07T00:45:12Z","abstract_excerpt":"Scholars in the humanities rely heavily on ancient manuscripts to study history, religion, and socio-political structures in the past. Many efforts have been devoted to digitizing these precious manuscripts using OCR technology, but most manuscripts were blemished over the centuries so that an Optical Character Recognition (OCR) program cannot be expected to capture faded graphs and stains on pages. This work presents a neural spelling correction model built on Google OCR-ed Tibetan Manuscripts to auto-correct OCR-ed noisy output. This paper is divided into four sections: dataset, model archit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2304.03427","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2304.03427/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3JOd05hW0AtV/5G3EUhI1BiQ+JUjS7vQHl6HeS6QMWKaWSArf0VTr5phy8XxgVVkEAU0nL8LQTP3xdrYVQatBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T18:16:54.853595Z"},"content_sha256":"5aa178f3b89ff7f80782fdc05802d48dcda4b11b3efb52a825d35f6e1b6f4138","schema_version":"1.0","event_id":"sha256:5aa178f3b89ff7f80782fdc05802d48dcda4b11b3efb52a825d35f6e1b6f4138"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PKIS67UISFBP2MVBTUHNBPTJPV/bundle.json","state_url":"https://pith.science/pith/PKIS67UISFBP2MVBTUHNBPTJPV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PKIS67UISFBP2MVBTUHNBPTJPV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T18:16:54Z","links":{"resolver":"https://pith.science/pith/PKIS67UISFBP2MVBTUHNBPTJPV","bundle":"https://pith.science/pith/PKIS67UISFBP2MVBTUHNBPTJPV/bundle.json","state":"https://pith.science/pith/PKIS67UISFBP2MVBTUHNBPTJPV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PKIS67UISFBP2MVBTUHNBPTJPV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:PKIS67UISFBP2MVBTUHNBPTJPV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e3ccd5c1fcefd23d7f450851352a2b2bb0b604db5938d8901caadbaf323d92e0","cross_cats_sorted":["cs.AI","cs.CY","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-04-07T00:45:12Z","title_canon_sha256":"d728a5ca1fac5e1d09a29c306b7ec1e20a19803290896b437705de296bd91008"},"schema_version":"1.0","source":{"id":"2304.03427","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2304.03427","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"arxiv_version","alias_value":"2304.03427v2","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2304.03427","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"pith_short_12","alias_value":"PKIS67UISFBP","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"pith_short_16","alias_value":"PKIS67UISFBP2MVB","created_at":"2026-05-20T00:02:01Z"},{"alias_kind":"pith_short_8","alias_value":"PKIS67UI","created_at":"2026-05-20T00:02:01Z"}],"graph_snapshots":[{"event_id":"sha256:5aa178f3b89ff7f80782fdc05802d48dcda4b11b3efb52a825d35f6e1b6f4138","target":"graph","created_at":"2026-05-20T00:02:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2304.03427/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Scholars in the humanities rely heavily on ancient manuscripts to study history, religion, and socio-political structures in the past. Many efforts have been devoted to digitizing these precious manuscripts using OCR technology, but most manuscripts were blemished over the centuries so that an Optical Character Recognition (OCR) program cannot be expected to capture faded graphs and stains on pages. This work presents a neural spelling correction model built on Google OCR-ed Tibetan Manuscripts to auto-correct OCR-ed noisy output. This paper is divided into four sections: dataset, model archit","authors_text":"Queenie Luo, Yung-Sung Chuang","cross_cats":["cs.AI","cs.CY","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-04-07T00:45:12Z","title":"Cleansing Jewel: A Neural Spelling Correction Model Built On Google OCR-ed Tibetan Manuscripts"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2304.03427","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:33de27b76d20ab5852bae19790baf2b95909fa410bf94244af1fcc01cd69c2ca","target":"record","created_at":"2026-05-20T00:02:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e3ccd5c1fcefd23d7f450851352a2b2bb0b604db5938d8901caadbaf323d92e0","cross_cats_sorted":["cs.AI","cs.CY","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-04-07T00:45:12Z","title_canon_sha256":"d728a5ca1fac5e1d09a29c306b7ec1e20a19803290896b437705de296bd91008"},"schema_version":"1.0","source":{"id":"2304.03427","kind":"arxiv","version":2}},"canonical_sha256":"7a912f7e889142fd32a19d0ed0be697d5060e8a4c4ff821c575afec781867bb2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7a912f7e889142fd32a19d0ed0be697d5060e8a4c4ff821c575afec781867bb2","first_computed_at":"2026-05-20T00:02:01.143957Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:01.143957Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Ll3r1rVymbgl6ZzXPGYvd+psMrmrjCxKNr8WkBQjzaGvfXRsV1HA3xFu2rGx2WTkPXdo2Rf9vqKexIijKuUXAQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:01.144713Z","signed_message":"canonical_sha256_bytes"},"source_id":"2304.03427","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:33de27b76d20ab5852bae19790baf2b95909fa410bf94244af1fcc01cd69c2ca","sha256:5aa178f3b89ff7f80782fdc05802d48dcda4b11b3efb52a825d35f6e1b6f4138"],"state_sha256":"462b4f9dc1a49aa48bb421a5f325333bd454338ee7bf58228726fd4861391074"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6zbP7zMpF96unxa7xMdfNWy8AKE1d6X78Pig27s8r7Hx98wco5aPOVwqlJ5ci3ECpdo4D3A5/7fivrrDASY4Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T18:16:54.859367Z","bundle_sha256":"35647751b3dfe72198756b24bce8e03ea8d8b02133f8c0f1f866c3b2796a479f"}}