{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:GCFW5R6RZMFUHBHBO6LONKFTBC","short_pith_number":"pith:GCFW5R6R","schema_version":"1.0","canonical_sha256":"308b6ec7d1cb0b4384e17796e6a8b308a4552ec77ff2b87735b0f39087c94629","source":{"kind":"arxiv","id":"2606.12708","version":1},"attestation_state":"computed","paper":{"title":"AfriSUD: A Dependency Treebank Collection for Evaluating Models on African Languages","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Andiswa Bukula, Aremu Anuoluwapo, Bruno Guillaume, Cheikh Mouhamadou Bamba Dione, Chinedu Uchechukwu, Christiane Fellbaum, David Ifeoluwa Adelani, David Sabiiti Bamutura, Happy Buzaaba, Idris Akinade, Kevin Guan, Kim Gerdes, Naome A. Etori, Peter Nabende, Rooweither Mabuya, Shamsuddeen Hassan Muhammad, Sylvain Kahane, Utitofon Inyang","submitted_at":"2026-06-10T21:55:02Z","abstract_excerpt":"Despite their linguistic diversity and global significance, African languages remain underrepresented in research and resources to support NLP. We aim to bridge this gap by introducing AfriSUD, the first large-scale collection of syntactically annotated treebanks for nine diverse African languages spanning major language families and regions across Sub-Saharan Africa. Using the Surface-Syntactic Universal Dependencies (SUD) framework, our community-led effort provides high-quality, native-speaker verified data that capture typological key features such as agglutination and tone. We evaluate a "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12708","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T21:55:02Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ae8ba5e28d68509bf659ab64c09b14dc2eb676da44e68c733cd4e5d381558e0d","abstract_canon_sha256":"a804c3e14966f362d7514e08ad6ea324d1e997dffbe2cb4d2b920b2b74279ec9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:08:46.670516Z","signature_b64":"rehbA45NS1rvOzC8QzAcdLdT3bI7rHa7zSOTM2LDhKZMzlKxFCM6iuqCgJA4jTh1lvFGDWmkPAp6kyawmN2MBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"308b6ec7d1cb0b4384e17796e6a8b308a4552ec77ff2b87735b0f39087c94629","last_reissued_at":"2026-06-12T01:08:46.669624Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:08:46.669624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"AfriSUD: A Dependency Treebank Collection for Evaluating Models on African Languages","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Andiswa Bukula, Aremu Anuoluwapo, Bruno Guillaume, Cheikh Mouhamadou Bamba Dione, Chinedu Uchechukwu, Christiane Fellbaum, David Ifeoluwa Adelani, David Sabiiti Bamutura, Happy Buzaaba, Idris Akinade, Kevin Guan, Kim Gerdes, Naome A. Etori, Peter Nabende, Rooweither Mabuya, Shamsuddeen Hassan Muhammad, Sylvain Kahane, Utitofon Inyang","submitted_at":"2026-06-10T21:55:02Z","abstract_excerpt":"Despite their linguistic diversity and global significance, African languages remain underrepresented in research and resources to support NLP. We aim to bridge this gap by introducing AfriSUD, the first large-scale collection of syntactically annotated treebanks for nine diverse African languages spanning major language families and regions across Sub-Saharan Africa. Using the Surface-Syntactic Universal Dependencies (SUD) framework, our community-led effort provides high-quality, native-speaker verified data that capture typological key features such as agglutination and tone. We evaluate a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12708","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12708/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12708","created_at":"2026-06-12T01:08:46.669763+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12708v1","created_at":"2026-06-12T01:08:46.669763+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12708","created_at":"2026-06-12T01:08:46.669763+00:00"},{"alias_kind":"pith_short_12","alias_value":"GCFW5R6RZMFU","created_at":"2026-06-12T01:08:46.669763+00:00"},{"alias_kind":"pith_short_16","alias_value":"GCFW5R6RZMFUHBHB","created_at":"2026-06-12T01:08:46.669763+00:00"},{"alias_kind":"pith_short_8","alias_value":"GCFW5R6R","created_at":"2026-06-12T01:08:46.669763+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC","json":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC.json","graph_json":"https://pith.science/api/pith-number/GCFW5R6RZMFUHBHBO6LONKFTBC/graph.json","events_json":"https://pith.science/api/pith-number/GCFW5R6RZMFUHBHBO6LONKFTBC/events.json","paper":"https://pith.science/paper/GCFW5R6R"},"agent_actions":{"view_html":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC","download_json":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC.json","view_paper":"https://pith.science/paper/GCFW5R6R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12708&json=true","fetch_graph":"https://pith.science/api/pith-number/GCFW5R6RZMFUHBHBO6LONKFTBC/graph.json","fetch_events":"https://pith.science/api/pith-number/GCFW5R6RZMFUHBHBO6LONKFTBC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC/action/storage_attestation","attest_author":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC/action/author_attestation","sign_citation":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC/action/citation_signature","submit_replication":"https://pith.science/pith/GCFW5R6RZMFUHBHBO6LONKFTBC/action/replication_record"}},"created_at":"2026-06-12T01:08:46.669763+00:00","updated_at":"2026-06-12T01:08:46.669763+00:00"}