{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:Y7CAYWR4EOZ23WTNMEC5CNQCNK","short_pith_number":"pith:Y7CAYWR4","canonical_record":{"source":{"id":"1803.09641","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-26T15:01:52Z","cross_cats_sorted":[],"title_canon_sha256":"d6230129c9ba722a6a9bc12a903754570680ce1bdea69caa04a4886dbaf13771","abstract_canon_sha256":"c11940f4e79354157f05c7b1884c09aaf89e3d33878602010370b56cb137bbf5"},"schema_version":"1.0"},"canonical_sha256":"c7c40c5a3c23b3adda6d6105d136026a99e52157d44ec6ea3bc3f831fb2e7e2a","source":{"kind":"arxiv","id":"1803.09641","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.09641","created_at":"2026-05-18T00:20:10Z"},{"alias_kind":"arxiv_version","alias_value":"1803.09641v1","created_at":"2026-05-18T00:20:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09641","created_at":"2026-05-18T00:20:10Z"},{"alias_kind":"pith_short_12","alias_value":"Y7CAYWR4EOZ2","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"Y7CAYWR4EOZ23WTN","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"Y7CAYWR4","created_at":"2026-05-18T12:33:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:Y7CAYWR4EOZ23WTNMEC5CNQCNK","target":"record","payload":{"canonical_record":{"source":{"id":"1803.09641","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-26T15:01:52Z","cross_cats_sorted":[],"title_canon_sha256":"d6230129c9ba722a6a9bc12a903754570680ce1bdea69caa04a4886dbaf13771","abstract_canon_sha256":"c11940f4e79354157f05c7b1884c09aaf89e3d33878602010370b56cb137bbf5"},"schema_version":"1.0"},"canonical_sha256":"c7c40c5a3c23b3adda6d6105d136026a99e52157d44ec6ea3bc3f831fb2e7e2a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:20:10.840039Z","signature_b64":"Wt3DtSa3iWZxfNPpFbHkZCnKfEpIBGTzxWt9Xi7gLyCzDvfR49AtOUzS1cM51BCxPaqegEPT4cl3TqDKmVJOAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7c40c5a3c23b3adda6d6105d136026a99e52157d44ec6ea3bc3f831fb2e7e2a","last_reissued_at":"2026-05-18T00:20:10.839277Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:20:10.839277Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.09641","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cPwq5MTRwGfV6omJ8fAGD99CZHONoDguj27g0pFMjZsuNJ2JEBH/dwvtkpSJL4918ZsClNMFpD1MtcRA5RKgBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T20:10:58.060569Z"},"content_sha256":"d2c3bc144f2bd142d42a26a8b7ee37a12fc90b4878051120f54c5965fc957904","schema_version":"1.0","event_id":"sha256:d2c3bc144f2bd142d42a26a8b7ee37a12fc90b4878051120f54c5965fc957904"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:Y7CAYWR4EOZ23WTNMEC5CNQCNK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Unsupervised Separation of Transliterable and Native Words for Malayalam","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Deepak P","submitted_at":"2018-03-26T15:01:52Z","abstract_excerpt":"Differentiating intrinsic language words from transliterable words is a key step aiding text processing tasks involving different natural languages. We consider the problem of unsupervised separation of transliterable words from native words for text in Malayalam language. Outlining a key observation on the diversity of characters beyond the word stem, we develop an optimization method to score words based on their nativeness. Our method relies on the usage of probability distributions over character n-grams that are refined in step with the nativeness scorings in an iterative optimization for"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09641","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yseJ0ZimIANZtdk2VwhIK0RsJFLubsXI5PJUR0DIqRWLlQOU/1lRYSg54R8pvVkCZu2Vc+3MwnS2/P4eYOtUBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T20:10:58.061209Z"},"content_sha256":"82e2a5e249ec626b38f59b3caea0d63629dd336ffce5c7d99ac85ea187e6ab18","schema_version":"1.0","event_id":"sha256:82e2a5e249ec626b38f59b3caea0d63629dd336ffce5c7d99ac85ea187e6ab18"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK/bundle.json","state_url":"https://pith.science/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T20:10:58Z","links":{"resolver":"https://pith.science/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK","bundle":"https://pith.science/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK/bundle.json","state":"https://pith.science/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Y7CAYWR4EOZ23WTNMEC5CNQCNK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:Y7CAYWR4EOZ23WTNMEC5CNQCNK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c11940f4e79354157f05c7b1884c09aaf89e3d33878602010370b56cb137bbf5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-26T15:01:52Z","title_canon_sha256":"d6230129c9ba722a6a9bc12a903754570680ce1bdea69caa04a4886dbaf13771"},"schema_version":"1.0","source":{"id":"1803.09641","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.09641","created_at":"2026-05-18T00:20:10Z"},{"alias_kind":"arxiv_version","alias_value":"1803.09641v1","created_at":"2026-05-18T00:20:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09641","created_at":"2026-05-18T00:20:10Z"},{"alias_kind":"pith_short_12","alias_value":"Y7CAYWR4EOZ2","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"Y7CAYWR4EOZ23WTN","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"Y7CAYWR4","created_at":"2026-05-18T12:33:04Z"}],"graph_snapshots":[{"event_id":"sha256:82e2a5e249ec626b38f59b3caea0d63629dd336ffce5c7d99ac85ea187e6ab18","target":"graph","created_at":"2026-05-18T00:20:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Differentiating intrinsic language words from transliterable words is a key step aiding text processing tasks involving different natural languages. We consider the problem of unsupervised separation of transliterable words from native words for text in Malayalam language. Outlining a key observation on the diversity of characters beyond the word stem, we develop an optimization method to score words based on their nativeness. Our method relies on the usage of probability distributions over character n-grams that are refined in step with the nativeness scorings in an iterative optimization for","authors_text":"Deepak P","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-26T15:01:52Z","title":"Unsupervised Separation of Transliterable and Native Words for Malayalam"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09641","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d2c3bc144f2bd142d42a26a8b7ee37a12fc90b4878051120f54c5965fc957904","target":"record","created_at":"2026-05-18T00:20:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c11940f4e79354157f05c7b1884c09aaf89e3d33878602010370b56cb137bbf5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-26T15:01:52Z","title_canon_sha256":"d6230129c9ba722a6a9bc12a903754570680ce1bdea69caa04a4886dbaf13771"},"schema_version":"1.0","source":{"id":"1803.09641","kind":"arxiv","version":1}},"canonical_sha256":"c7c40c5a3c23b3adda6d6105d136026a99e52157d44ec6ea3bc3f831fb2e7e2a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c7c40c5a3c23b3adda6d6105d136026a99e52157d44ec6ea3bc3f831fb2e7e2a","first_computed_at":"2026-05-18T00:20:10.839277Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:20:10.839277Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Wt3DtSa3iWZxfNPpFbHkZCnKfEpIBGTzxWt9Xi7gLyCzDvfR49AtOUzS1cM51BCxPaqegEPT4cl3TqDKmVJOAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:20:10.840039Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.09641","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d2c3bc144f2bd142d42a26a8b7ee37a12fc90b4878051120f54c5965fc957904","sha256:82e2a5e249ec626b38f59b3caea0d63629dd336ffce5c7d99ac85ea187e6ab18"],"state_sha256":"f829ce4c5690a4584ddca32d2313ad35c8ef630ecec7024bd210facface3a3ea"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VQRF4hr2q2v4px5MfnPtiFz3plejLu/7QzjGu9P/X0FAsseNGeMwnSkFf/89hp2K7CHgEDYcqnElEgCRpt47Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T20:10:58.064696Z","bundle_sha256":"0827c813c0841edee2418840b99e1d59172956f772b9e924eb689ff60d141a55"}}