{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YPXVKQQJR4Q5TYPTDXJ2G3LCMP","short_pith_number":"pith:YPXVKQQJ","schema_version":"1.0","canonical_sha256":"c3ef5542098f21d9e1f31dd3a36d6263f954b6e9229c52f3e5a232ec2ec50aec","source":{"kind":"arxiv","id":"2606.24460","version":1},"attestation_state":"computed","paper":{"title":"The African Language Tax: Quantifying the Cost, Latency, and Context Penalty of Tokenizing African Languages in Frontier LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Olaoye Anthony Somide","submitted_at":"2026-06-23T11:47:03Z","abstract_excerpt":"Commercial large language models bill, scale latency, and budget context per token. Yet tokenizers assign more subword tokens to the same meaning in some languages than in others, so speakers of languages with high token-fertility pay a structural penalty before a model is ever invoked. This penalty is documented for multilingual settings in general, but it has not been measured systematically for African languages at the level of enterprise deployment economics and cognitive context capacity. We measure it across 20 African languages spanning five language families and three scripts (Latin, G"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.24460","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T11:47:03Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"86f485a2006769baeb929a5e194f5d7caa5295c76e9152b48f5901562e7ce016","abstract_canon_sha256":"6e7d6487bdb21ab238eb94b3a6093250fa1164ec86b2f0461abfa4ec4acadbb7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:31.018350Z","signature_b64":"ROa9HSSvRJVb+Mxaea1sVjW+MmwmI5z9goUX8gqbxuVxmm+nAJJ6h0dhl8OpewmadyjWbGHqR/+uc4PPpvakAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c3ef5542098f21d9e1f31dd3a36d6263f954b6e9229c52f3e5a232ec2ec50aec","last_reissued_at":"2026-06-24T01:15:31.017993Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:31.017993Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The African Language Tax: Quantifying the Cost, Latency, and Context Penalty of Tokenizing African Languages in Frontier LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Olaoye Anthony Somide","submitted_at":"2026-06-23T11:47:03Z","abstract_excerpt":"Commercial large language models bill, scale latency, and budget context per token. Yet tokenizers assign more subword tokens to the same meaning in some languages than in others, so speakers of languages with high token-fertility pay a structural penalty before a model is ever invoked. This penalty is documented for multilingual settings in general, but it has not been measured systematically for African languages at the level of enterprise deployment economics and cognitive context capacity. We measure it across 20 African languages spanning five language families and three scripts (Latin, G"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24460","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24460/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.24460","created_at":"2026-06-24T01:15:31.018054+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.24460v1","created_at":"2026-06-24T01:15:31.018054+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24460","created_at":"2026-06-24T01:15:31.018054+00:00"},{"alias_kind":"pith_short_12","alias_value":"YPXVKQQJR4Q5","created_at":"2026-06-24T01:15:31.018054+00:00"},{"alias_kind":"pith_short_16","alias_value":"YPXVKQQJR4Q5TYPT","created_at":"2026-06-24T01:15:31.018054+00:00"},{"alias_kind":"pith_short_8","alias_value":"YPXVKQQJ","created_at":"2026-06-24T01:15:31.018054+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP","json":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP.json","graph_json":"https://pith.science/api/pith-number/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/graph.json","events_json":"https://pith.science/api/pith-number/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/events.json","paper":"https://pith.science/paper/YPXVKQQJ"},"agent_actions":{"view_html":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP","download_json":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP.json","view_paper":"https://pith.science/paper/YPXVKQQJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.24460&json=true","fetch_graph":"https://pith.science/api/pith-number/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/graph.json","fetch_events":"https://pith.science/api/pith-number/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/action/storage_attestation","attest_author":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/action/author_attestation","sign_citation":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/action/citation_signature","submit_replication":"https://pith.science/pith/YPXVKQQJR4Q5TYPTDXJ2G3LCMP/action/replication_record"}},"created_at":"2026-06-24T01:15:31.018054+00:00","updated_at":"2026-06-24T01:15:31.018054+00:00"}