{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2021:A4F5SCDBFA4V7HWWXQSKPC2FOZ","short_pith_number":"pith:A4F5SCDB","canonical_record":{"source":{"id":"2103.11811","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-03-22T13:12:44Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"1643213e7cccdcb4abdab21e04df098cb3613c0a900c1bb17dc0c6470e8d8e96","abstract_canon_sha256":"1fb4972441220e82e7ca1ee52ff6771917474d847a110aa46b0db47d1ac71992"},"schema_version":"1.0"},"canonical_sha256":"070bd9086128395f9ed6bc24a78b457650a299e590a864b490321d616ad5df88","source":{"kind":"arxiv","id":"2103.11811","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2103.11811","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"arxiv_version","alias_value":"2103.11811v2","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2103.11811","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"pith_short_12","alias_value":"A4F5SCDBFA4V","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"pith_short_16","alias_value":"A4F5SCDBFA4V7HWW","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"pith_short_8","alias_value":"A4F5SCDB","created_at":"2026-07-05T02:54:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2021:A4F5SCDBFA4V7HWWXQSKPC2FOZ","target":"record","payload":{"canonical_record":{"source":{"id":"2103.11811","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-03-22T13:12:44Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"1643213e7cccdcb4abdab21e04df098cb3613c0a900c1bb17dc0c6470e8d8e96","abstract_canon_sha256":"1fb4972441220e82e7ca1ee52ff6771917474d847a110aa46b0db47d1ac71992"},"schema_version":"1.0"},"canonical_sha256":"070bd9086128395f9ed6bc24a78b457650a299e590a864b490321d616ad5df88","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T02:54:55.286864Z","signature_b64":"N5JiVsAgaqiu/0tpq04zxsxrHruRnBNaX/enjYQnHOrOXGyae96G7+maKywCe7gnq9oT9kEywXsSsqhdZNWzCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"070bd9086128395f9ed6bc24a78b457650a299e590a864b490321d616ad5df88","last_reissued_at":"2026-07-05T02:54:55.286452Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T02:54:55.286452Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2103.11811","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T02:54:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0zyemVNtw3Qw99fQ8ZhfALWnIQTiioBHR2lomHgX2NuYCCYgo+xjb/6T0/UTBurKJ1gEb90vEN7UxcYTjJrVDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:38:15.890556Z"},"content_sha256":"5f449665da67bc36eea6c4af8c195c88c5551502ef66bb12b618831cdb69bd65","schema_version":"1.0","event_id":"sha256:5f449665da67bc36eea6c4af8c195c88c5551502ef66bb12b618831cdb69bd65"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2021:A4F5SCDBFA4V7HWWXQSKPC2FOZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MasakhaNER: Named Entity Recognition for African Languages","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Abdoulaye Diallo, Abdoulaye Faye, Adewale Akinfaderin, Anuoluwapo Aremu, Ayodele Awokoya, Blessing Sibanda, Bonaventure F. P. Dossou, Catherine Gitau, Chester Palen-Michel, Chiamaka Chukwuneke, Chris Chinenye Emezue, Clemencia Siro, Constantine Lignos, Daniel D'souza, David Ifeoluwa Adelani, Davis David, Deborah Nabagereka, Degaga Wolde, Derguene Mbaye, Dibora Gebreyohannes, Emmanuel Anebi, Eric Peter Wairagala, Gerald Muriuki, Graham Neubig, Happy Buzaaba, Henok Tilaye, Ignatius Ezeani, Iroro Orife, Israel Abebe Azime, Jade Abbott, Jesujoba Alabi, Jonathan Mukiibi, Joyce Nakatumba-Nabende, Julia Kreutzer, Kelechi Nwaike, Kelechi Ogueji, Maurice Katusiime, Mofetoluwa Adeyemi, Mouhamadane Mboup, Nkiruka Odu, Orevaoghene Ahia, Paul Rayson, Perez Ogayo, Rubungo Andre Niyongabo, Salomey Osei, Samba Ngom, Samuel Oyerinde, Sebastian Ruder, Seid Muhie Yimam, Shamsuddeen Muhammad, Shruti Rijhwani, Stephen Mayhew, Tajuddeen Gwadabe, Temilola Oloyede, Tendai Marengereke, Thierno Ibrahima DIOP, Tobius Saul Bateesa, Tosin Adewumi, Verrah Otiende, Victor Akinode, Yvonne Wambui","submitted_at":"2021-03-22T13:12:44Z","abstract_excerpt":"We take a step towards addressing the under-representation of the African continent in NLP research by creating the first large publicly available high-quality dataset for named entity recognition (NER) in ten African languages, bringing together a variety of stakeholders. We detail characteristics of the languages to help researchers understand the challenges that these languages pose for NER. We analyze our datasets and conduct an extensive empirical evaluation of state-of-the-art methods across both supervised and transfer learning settings. We release the data, code, and models in order to"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2103.11811","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2103.11811/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T02:54:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Rk/ZvuxiOWnmJBXhA0vSxqATVHV1EYTdocaI6U1N22GihpncHAohuLlou0yVuMSeAHVsCUObBOtl27n+3HTcDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:38:15.891148Z"},"content_sha256":"b6bdd951a9a5aba2da82d018b0b3a46ea801da5bb6b13b165eef45d5141b65cd","schema_version":"1.0","event_id":"sha256:b6bdd951a9a5aba2da82d018b0b3a46ea801da5bb6b13b165eef45d5141b65cd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ/bundle.json","state_url":"https://pith.science/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T15:38:15Z","links":{"resolver":"https://pith.science/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ","bundle":"https://pith.science/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ/bundle.json","state":"https://pith.science/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/A4F5SCDBFA4V7HWWXQSKPC2FOZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2021:A4F5SCDBFA4V7HWWXQSKPC2FOZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1fb4972441220e82e7ca1ee52ff6771917474d847a110aa46b0db47d1ac71992","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-03-22T13:12:44Z","title_canon_sha256":"1643213e7cccdcb4abdab21e04df098cb3613c0a900c1bb17dc0c6470e8d8e96"},"schema_version":"1.0","source":{"id":"2103.11811","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2103.11811","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"arxiv_version","alias_value":"2103.11811v2","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2103.11811","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"pith_short_12","alias_value":"A4F5SCDBFA4V","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"pith_short_16","alias_value":"A4F5SCDBFA4V7HWW","created_at":"2026-07-05T02:54:55Z"},{"alias_kind":"pith_short_8","alias_value":"A4F5SCDB","created_at":"2026-07-05T02:54:55Z"}],"graph_snapshots":[{"event_id":"sha256:b6bdd951a9a5aba2da82d018b0b3a46ea801da5bb6b13b165eef45d5141b65cd","target":"graph","created_at":"2026-07-05T02:54:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2103.11811/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We take a step towards addressing the under-representation of the African continent in NLP research by creating the first large publicly available high-quality dataset for named entity recognition (NER) in ten African languages, bringing together a variety of stakeholders. We detail characteristics of the languages to help researchers understand the challenges that these languages pose for NER. We analyze our datasets and conduct an extensive empirical evaluation of state-of-the-art methods across both supervised and transfer learning settings. We release the data, code, and models in order to","authors_text":"Abdoulaye Diallo, Abdoulaye Faye, Adewale Akinfaderin, Anuoluwapo Aremu, Ayodele Awokoya, Blessing Sibanda, Bonaventure F. P. Dossou, Catherine Gitau, Chester Palen-Michel, Chiamaka Chukwuneke, Chris Chinenye Emezue, Clemencia Siro, Constantine Lignos, Daniel D'souza, David Ifeoluwa Adelani, Davis David, Deborah Nabagereka, Degaga Wolde, Derguene Mbaye, Dibora Gebreyohannes, Emmanuel Anebi, Eric Peter Wairagala, Gerald Muriuki, Graham Neubig, Happy Buzaaba, Henok Tilaye, Ignatius Ezeani, Iroro Orife, Israel Abebe Azime, Jade Abbott, Jesujoba Alabi, Jonathan Mukiibi, Joyce Nakatumba-Nabende, Julia Kreutzer, Kelechi Nwaike, Kelechi Ogueji, Maurice Katusiime, Mofetoluwa Adeyemi, Mouhamadane Mboup, Nkiruka Odu, Orevaoghene Ahia, Paul Rayson, Perez Ogayo, Rubungo Andre Niyongabo, Salomey Osei, Samba Ngom, Samuel Oyerinde, Sebastian Ruder, Seid Muhie Yimam, Shamsuddeen Muhammad, Shruti Rijhwani, Stephen Mayhew, Tajuddeen Gwadabe, Temilola Oloyede, Tendai Marengereke, Thierno Ibrahima DIOP, Tobius Saul Bateesa, Tosin Adewumi, Verrah Otiende, Victor Akinode, Yvonne Wambui","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-03-22T13:12:44Z","title":"MasakhaNER: Named Entity Recognition for African Languages"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2103.11811","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5f449665da67bc36eea6c4af8c195c88c5551502ef66bb12b618831cdb69bd65","target":"record","created_at":"2026-07-05T02:54:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1fb4972441220e82e7ca1ee52ff6771917474d847a110aa46b0db47d1ac71992","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-03-22T13:12:44Z","title_canon_sha256":"1643213e7cccdcb4abdab21e04df098cb3613c0a900c1bb17dc0c6470e8d8e96"},"schema_version":"1.0","source":{"id":"2103.11811","kind":"arxiv","version":2}},"canonical_sha256":"070bd9086128395f9ed6bc24a78b457650a299e590a864b490321d616ad5df88","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"070bd9086128395f9ed6bc24a78b457650a299e590a864b490321d616ad5df88","first_computed_at":"2026-07-05T02:54:55.286452Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T02:54:55.286452Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"N5JiVsAgaqiu/0tpq04zxsxrHruRnBNaX/enjYQnHOrOXGyae96G7+maKywCe7gnq9oT9kEywXsSsqhdZNWzCA==","signature_status":"signed_v1","signed_at":"2026-07-05T02:54:55.286864Z","signed_message":"canonical_sha256_bytes"},"source_id":"2103.11811","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5f449665da67bc36eea6c4af8c195c88c5551502ef66bb12b618831cdb69bd65","sha256:b6bdd951a9a5aba2da82d018b0b3a46ea801da5bb6b13b165eef45d5141b65cd"],"state_sha256":"98e355b5e02759290ea7fc40f95f05286e8c13410cbe3729619e7b6f57278964"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/2zU8vgVmG/x1elcGa3UO7uUuKGMKwNwVK0E6xN5ccPmEdsEDR2QH+wfNcVh6NyQYMvviQIgBI1pwM+RflXbBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T15:38:15.893456Z","bundle_sha256":"d2fc2322540f53b48d11ec1dcff61a2951e794697dac910392d6dbf92bec6002"}}