{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:N6WAZZ74B67LMP3ZUY2S2YA3TZ","short_pith_number":"pith:N6WAZZ74","schema_version":"1.0","canonical_sha256":"6fac0ce7fc0fbeb63f79a6352d601b9e71cd9a586ea8a1ce434af8e2e833e7ea","source":{"kind":"arxiv","id":"1810.03430","version":1},"attestation_state":"computed","paper":{"title":"Cross Script Hindi English NER Corpus from Wikipedia","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.IR","authors_text":"Md Arshad Ali, Mohd Zeeshan Ansari, Tanvir Ahmad","submitted_at":"2018-10-08T13:25:05Z","abstract_excerpt":"The text generated on social media platforms is essentially a mixed lingual text. The mixing of language in any form produces considerable amount of difficulty in language processing systems. Moreover, the advancements in language processing research depends upon the availability of standard corpora. The development of mixed lingual Indian Named Entity Recognition (NER) systems are facing obstacles due to unavailability of the standard evaluation corpora. Such corpora may be of mixed lingual nature in which text is written using multiple languages predominantly using a single script only. The "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.03430","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-10-08T13:25:05Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"285df46dc84d3ac279124a7871cfe9d7687a4039c35f3ab94e827818eec54d77","abstract_canon_sha256":"633afa0deb47118cf4535f25a5e040126e41162eeaa3429f48dafc52600041e8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:03:51.294313Z","signature_b64":"HwpYxTCO/O5WQ2IlHNH/Bm3vJupDy1ZnITDQPqxAkflJyXIxQqwS0zbX0Y2YZKUCtdis5u40IiMHi7xouT6vCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6fac0ce7fc0fbeb63f79a6352d601b9e71cd9a586ea8a1ce434af8e2e833e7ea","last_reissued_at":"2026-05-18T00:03:51.293724Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:03:51.293724Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Cross Script Hindi English NER Corpus from Wikipedia","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.IR","authors_text":"Md Arshad Ali, Mohd Zeeshan Ansari, Tanvir Ahmad","submitted_at":"2018-10-08T13:25:05Z","abstract_excerpt":"The text generated on social media platforms is essentially a mixed lingual text. The mixing of language in any form produces considerable amount of difficulty in language processing systems. Moreover, the advancements in language processing research depends upon the availability of standard corpora. The development of mixed lingual Indian Named Entity Recognition (NER) systems are facing obstacles due to unavailability of the standard evaluation corpora. Such corpora may be of mixed lingual nature in which text is written using multiple languages predominantly using a single script only. The "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.03430","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.03430","created_at":"2026-05-18T00:03:51.293833+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.03430v1","created_at":"2026-05-18T00:03:51.293833+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.03430","created_at":"2026-05-18T00:03:51.293833+00:00"},{"alias_kind":"pith_short_12","alias_value":"N6WAZZ74B67L","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_16","alias_value":"N6WAZZ74B67LMP3Z","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_8","alias_value":"N6WAZZ74","created_at":"2026-05-18T12:32:40.477152+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2510.07037","citing_title":"Beyond Monolingual Assumptions: A Survey of Code-Switched NLP in the Era of Large Language Models across Modalities","ref_index":3,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ","json":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ.json","graph_json":"https://pith.science/api/pith-number/N6WAZZ74B67LMP3ZUY2S2YA3TZ/graph.json","events_json":"https://pith.science/api/pith-number/N6WAZZ74B67LMP3ZUY2S2YA3TZ/events.json","paper":"https://pith.science/paper/N6WAZZ74"},"agent_actions":{"view_html":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ","download_json":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ.json","view_paper":"https://pith.science/paper/N6WAZZ74","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.03430&json=true","fetch_graph":"https://pith.science/api/pith-number/N6WAZZ74B67LMP3ZUY2S2YA3TZ/graph.json","fetch_events":"https://pith.science/api/pith-number/N6WAZZ74B67LMP3ZUY2S2YA3TZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ/action/storage_attestation","attest_author":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ/action/author_attestation","sign_citation":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ/action/citation_signature","submit_replication":"https://pith.science/pith/N6WAZZ74B67LMP3ZUY2S2YA3TZ/action/replication_record"}},"created_at":"2026-05-18T00:03:51.293833+00:00","updated_at":"2026-05-18T00:03:51.293833+00:00"}