{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:R4DQYERZI5ZC64DQCRN3VIYUYP","short_pith_number":"pith:R4DQYERZ","canonical_record":{"source":{"id":"1806.05432","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T09:39:32Z","cross_cats_sorted":[],"title_canon_sha256":"f3c9451e1cc9d9c5e01751216eaeed8811f8caba7d9645591f717431ddb389ed","abstract_canon_sha256":"97ccd8763f1d5fc471de9a08200ec9d1bf1f0fdf7b34397de8a9c11c227932c8"},"schema_version":"1.0"},"canonical_sha256":"8f070c123947722f7070145bbaa314c3f648ee4ee701e364175b86c8c48852e8","source":{"kind":"arxiv","id":"1806.05432","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.05432","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"arxiv_version","alias_value":"1806.05432v1","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.05432","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"pith_short_12","alias_value":"R4DQYERZI5ZC","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"R4DQYERZI5ZC64DQ","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"R4DQYERZ","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:R4DQYERZI5ZC64DQCRN3VIYUYP","target":"record","payload":{"canonical_record":{"source":{"id":"1806.05432","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T09:39:32Z","cross_cats_sorted":[],"title_canon_sha256":"f3c9451e1cc9d9c5e01751216eaeed8811f8caba7d9645591f717431ddb389ed","abstract_canon_sha256":"97ccd8763f1d5fc471de9a08200ec9d1bf1f0fdf7b34397de8a9c11c227932c8"},"schema_version":"1.0"},"canonical_sha256":"8f070c123947722f7070145bbaa314c3f648ee4ee701e364175b86c8c48852e8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:15.967547Z","signature_b64":"2gdQrzio3Aa6MD06p+O5wRuYSC47/ZIiYLpAtS8Pccy2yfCMQF8Uy7YZHj/XRGawiaD03iCNpIwj6kpGgIPzBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f070c123947722f7070145bbaa314c3f648ee4ee701e364175b86c8c48852e8","last_reissued_at":"2026-05-18T00:13:15.966918Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:15.966918Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.05432","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uINmE3ZyclhCfTLWTFBCrEWM9P+Pu9LsP4MSl78Eyt3kgpSyti9wyI8ESbXQ8KPLo/SXJcN+7sB54FhT2seLBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T07:33:48.664789Z"},"content_sha256":"7de8a345d0c4b6c4ba5cf2f2670731e0a840f4b25fd10f1b1fac0de71c698f96","schema_version":"1.0","event_id":"sha256:7de8a345d0c4b6c4ba5cf2f2670731e0a840f4b25fd10f1b1fac0de71c698f96"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:R4DQYERZI5ZC64DQCRN3VIYUYP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Urdu Word Segmentation using Conditional Random Fields (CRFs)","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Agha Ali Raza, Awais Athar, Haris Bin Zia","submitted_at":"2018-06-14T09:39:32Z","abstract_excerpt":"State-of-the-art Natural Language Processing algorithms rely heavily on efficient word segmentation. Urdu is amongst languages for which word segmentation is a complex task as it exhibits space omission as well as space insertion issues. This is partly due to the Arabic script which although cursive in nature, consists of characters that have inherent joining and non-joining attributes regardless of word boundary. This paper presents a word segmentation system for Urdu which uses a Conditional Random Field sequence modeler with orthographic, linguistic and morphological features. Our proposed "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.05432","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KgQiiF8Qscwk2CsVrMTmUeeAAtIDOCxyk3bEOmQjgujvVHpZSpGfeEiEncMjW+/MynHwAEyKZCXHbUP3ZN1FAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T07:33:48.665349Z"},"content_sha256":"0aaf2afcabef70d69a5363ffecdc3e2dc31bc3c7f716bcb6911573bdb9f3371c","schema_version":"1.0","event_id":"sha256:0aaf2afcabef70d69a5363ffecdc3e2dc31bc3c7f716bcb6911573bdb9f3371c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R4DQYERZI5ZC64DQCRN3VIYUYP/bundle.json","state_url":"https://pith.science/pith/R4DQYERZI5ZC64DQCRN3VIYUYP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R4DQYERZI5ZC64DQCRN3VIYUYP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-12T07:33:48Z","links":{"resolver":"https://pith.science/pith/R4DQYERZI5ZC64DQCRN3VIYUYP","bundle":"https://pith.science/pith/R4DQYERZI5ZC64DQCRN3VIYUYP/bundle.json","state":"https://pith.science/pith/R4DQYERZI5ZC64DQCRN3VIYUYP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R4DQYERZI5ZC64DQCRN3VIYUYP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:R4DQYERZI5ZC64DQCRN3VIYUYP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"97ccd8763f1d5fc471de9a08200ec9d1bf1f0fdf7b34397de8a9c11c227932c8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T09:39:32Z","title_canon_sha256":"f3c9451e1cc9d9c5e01751216eaeed8811f8caba7d9645591f717431ddb389ed"},"schema_version":"1.0","source":{"id":"1806.05432","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.05432","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"arxiv_version","alias_value":"1806.05432v1","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.05432","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"pith_short_12","alias_value":"R4DQYERZI5ZC","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"R4DQYERZI5ZC64DQ","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"R4DQYERZ","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:0aaf2afcabef70d69a5363ffecdc3e2dc31bc3c7f716bcb6911573bdb9f3371c","target":"graph","created_at":"2026-05-18T00:13:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"State-of-the-art Natural Language Processing algorithms rely heavily on efficient word segmentation. Urdu is amongst languages for which word segmentation is a complex task as it exhibits space omission as well as space insertion issues. This is partly due to the Arabic script which although cursive in nature, consists of characters that have inherent joining and non-joining attributes regardless of word boundary. This paper presents a word segmentation system for Urdu which uses a Conditional Random Field sequence modeler with orthographic, linguistic and morphological features. Our proposed ","authors_text":"Agha Ali Raza, Awais Athar, Haris Bin Zia","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T09:39:32Z","title":"Urdu Word Segmentation using Conditional Random Fields (CRFs)"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.05432","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7de8a345d0c4b6c4ba5cf2f2670731e0a840f4b25fd10f1b1fac0de71c698f96","target":"record","created_at":"2026-05-18T00:13:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"97ccd8763f1d5fc471de9a08200ec9d1bf1f0fdf7b34397de8a9c11c227932c8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T09:39:32Z","title_canon_sha256":"f3c9451e1cc9d9c5e01751216eaeed8811f8caba7d9645591f717431ddb389ed"},"schema_version":"1.0","source":{"id":"1806.05432","kind":"arxiv","version":1}},"canonical_sha256":"8f070c123947722f7070145bbaa314c3f648ee4ee701e364175b86c8c48852e8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8f070c123947722f7070145bbaa314c3f648ee4ee701e364175b86c8c48852e8","first_computed_at":"2026-05-18T00:13:15.966918Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:15.966918Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2gdQrzio3Aa6MD06p+O5wRuYSC47/ZIiYLpAtS8Pccy2yfCMQF8Uy7YZHj/XRGawiaD03iCNpIwj6kpGgIPzBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:15.967547Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.05432","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7de8a345d0c4b6c4ba5cf2f2670731e0a840f4b25fd10f1b1fac0de71c698f96","sha256:0aaf2afcabef70d69a5363ffecdc3e2dc31bc3c7f716bcb6911573bdb9f3371c"],"state_sha256":"75b8306ef570633d0f17fe00c16f5b4f7ed082b696d428aaad0cf12988c027a2"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qZKC+vC9GGqKHuRT6shvBvNOb1Kdq8GrBpywbGQbBudTW7kzO9SALhUyvsq7qj616FRhVPQFrybnlJdso3z0Dw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-12T07:33:48.668195Z","bundle_sha256":"8032eac8e694e3206dd02553dc77381f82e48f62ca1a1c49ddd76505e5b7ab63"}}