{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZZE27TKB5BJVIW52Z4ZK4GUXN6","short_pith_number":"pith:ZZE27TKB","schema_version":"1.0","canonical_sha256":"ce49afcd41e853545bbacf32ae1a976f90f6a03056ce20f5ff790dd4003453cb","source":{"kind":"arxiv","id":"2604.20996","version":2},"attestation_state":"computed","paper":{"title":"AFRILANGTUTOR: Advancing Language Tutoring and Culture Education in Low-Resource Languages with Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Fine-tuning LLMs on dictionary-derived African tutoring data produces consistent gains over base models.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anshuman Chhabra, Chris Biemann, Israel Abebe Azime, Marek Rei, Ocean Monjur, Seid Muhie Yimam, Shahriar Kabir Nahin, Shamsuddeen Hassan Muhammad, Tadesse Destaw Belay","submitted_at":"2026-04-22T18:38:04Z","abstract_excerpt":"How can language learning systems be developed for languages that lack sufficient training resources? This challenge is increasingly faced by developers across the African continent who aim to build AI systems capable of understanding and responding in local languages. To address this gap, we introduce AFRILANGDICT, a collection of 194.7K African language-English dictionary entries designed as seed resources for generating language-learning materials, enabling us to automatically construct large-scale, diverse, and verifiable student-tutor question-answer interactions suitable for training AI-"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2604.20996","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-22T18:38:04Z","cross_cats_sorted":[],"title_canon_sha256":"16f0340ec2cdef08d50dbf227e24dc8e2b0bf01c52016f2954441f638433ff34","abstract_canon_sha256":"da1066b06a0598bf18585fcf2d6239849436f69de5ff177dcc83bed298fd73ce"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:08.537548Z","signature_b64":"qvlx4PHVwv9k9mbnVU7Q0X+0OcysuFDqPsScgoQVSlGPSrOdy9R6KY8tpwNHXcrjw5wjztRvEhSjCTBO5xv7Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ce49afcd41e853545bbacf32ae1a976f90f6a03056ce20f5ff790dd4003453cb","last_reissued_at":"2026-05-28T01:04:08.536995Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:08.536995Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"AFRILANGTUTOR: Advancing Language Tutoring and Culture Education in Low-Resource Languages with Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Fine-tuning LLMs on dictionary-derived African tutoring data produces consistent gains over base models.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anshuman Chhabra, Chris Biemann, Israel Abebe Azime, Marek Rei, Ocean Monjur, Seid Muhie Yimam, Shahriar Kabir Nahin, Shamsuddeen Hassan Muhammad, Tadesse Destaw Belay","submitted_at":"2026-04-22T18:38:04Z","abstract_excerpt":"How can language learning systems be developed for languages that lack sufficient training resources? This challenge is increasingly faced by developers across the African continent who aim to build AI systems capable of understanding and responding in local languages. To address this gap, we introduce AFRILANGDICT, a collection of 194.7K African language-English dictionary entries designed as seed resources for generating language-learning materials, enabling us to automatically construct large-scale, diverse, and verifiable student-tutor question-answer interactions suitable for training AI-"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Models trained on AFRILANGEDU consistently outperform their base counterparts, and combining SFT and DPO yields substantial improvements, with gains ranging from 1.8% to 15.5% under LLM-as-a-judge evaluations across four criteria.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The LLM-as-a-judge evaluation reliably measures tutoring quality for low-resource languages; the paper provides no human validation of the judge's scores or of the automatically generated question-answer pairs.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"New dictionary-derived datasets enable fine-tuned LLMs to act as language tutors for ten low-resource African languages, with SFT plus DPO yielding 1.8-15.5% gains on LLM-as-judge metrics.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Fine-tuning LLMs on dictionary-derived African tutoring data produces consistent gains over base models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"0b5df3ecddacfe92cfe52f4f653e9199e882f131b789d8808acd24652bbe40b3"},"source":{"id":"2604.20996","kind":"arxiv","version":2},"verdict":{"id":"dba8ac45-c0a6-43f8-ada8-d767ec118331","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T00:32:23.657862Z","strongest_claim":"Models trained on AFRILANGEDU consistently outperform their base counterparts, and combining SFT and DPO yields substantial improvements, with gains ranging from 1.8% to 15.5% under LLM-as-a-judge evaluations across four criteria.","one_line_summary":"New dictionary-derived datasets enable fine-tuned LLMs to act as language tutors for ten low-resource African languages, with SFT plus DPO yielding 1.8-15.5% gains on LLM-as-judge metrics.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The LLM-as-a-judge evaluation reliably measures tutoring quality for low-resource languages; the paper provides no human validation of the judge's scores or of the automatically generated question-answer pairs.","pith_extraction_headline":"Fine-tuning LLMs on dictionary-derived African tutoring data produces consistent gains over base models."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.20996/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-21T13:39:19.683442Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-20T01:29:39.916452Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"f7117827d992a4a7a1dfc7510ebd79cee07b1cb61e1c94063781b2cc0fd81fca"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.20996","created_at":"2026-05-28T01:04:08.537053+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.20996v2","created_at":"2026-05-28T01:04:08.537053+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.20996","created_at":"2026-05-28T01:04:08.537053+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZZE27TKB5BJV","created_at":"2026-05-28T01:04:08.537053+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZZE27TKB5BJVIW52","created_at":"2026-05-28T01:04:08.537053+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZZE27TKB","created_at":"2026-05-28T01:04:08.537053+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6","json":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6.json","graph_json":"https://pith.science/api/pith-number/ZZE27TKB5BJVIW52Z4ZK4GUXN6/graph.json","events_json":"https://pith.science/api/pith-number/ZZE27TKB5BJVIW52Z4ZK4GUXN6/events.json","paper":"https://pith.science/paper/ZZE27TKB"},"agent_actions":{"view_html":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6","download_json":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6.json","view_paper":"https://pith.science/paper/ZZE27TKB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.20996&json=true","fetch_graph":"https://pith.science/api/pith-number/ZZE27TKB5BJVIW52Z4ZK4GUXN6/graph.json","fetch_events":"https://pith.science/api/pith-number/ZZE27TKB5BJVIW52Z4ZK4GUXN6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6/action/storage_attestation","attest_author":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6/action/author_attestation","sign_citation":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6/action/citation_signature","submit_replication":"https://pith.science/pith/ZZE27TKB5BJVIW52Z4ZK4GUXN6/action/replication_record"}},"created_at":"2026-05-28T01:04:08.537053+00:00","updated_at":"2026-05-28T01:04:08.537053+00:00"}