{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2022:YP2CQD75IPROXYPCSTA473GRPG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"efe8a5b44ce80309ea79f62a937d4d112d121dba2af0bf1b0cd1fedebaec0872","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-05-21T22:38:19Z","title_canon_sha256":"3e47267e781dcfd3ac8a61b0042ef1bc181b3331a6a3182f44c83ddfe54da409"},"schema_version":"1.0","source":{"id":"2205.10687","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2205.10687","created_at":"2026-07-05T04:25:22Z"},{"alias_kind":"arxiv_version","alias_value":"2205.10687v1","created_at":"2026-07-05T04:25:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2205.10687","created_at":"2026-07-05T04:25:22Z"},{"alias_kind":"pith_short_12","alias_value":"YP2CQD75IPRO","created_at":"2026-07-05T04:25:22Z"},{"alias_kind":"pith_short_16","alias_value":"YP2CQD75IPROXYPC","created_at":"2026-07-05T04:25:22Z"},{"alias_kind":"pith_short_8","alias_value":"YP2CQD75","created_at":"2026-07-05T04:25:22Z"}],"graph_snapshots":[{"event_id":"sha256:a77a2ea9dc9961cce89be51c3d7d2ccb2502b5d53b72b11e9020dac280763875","target":"graph","created_at":"2026-07-05T04:25:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2205.10687/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"There is a growing body of work in recent years to develop pre-trained language models (PLMs) for the Arabic language. This work concerns addressing two major problems in existing Arabic PLMs which constraint progress of the Arabic NLU and NLG fields.First, existing Arabic PLMs are not well-explored and their pre-trainig can be improved significantly using a more methodical approach. Second, there is a lack of systematic and reproducible evaluation of these models in the literature. In this work, we revisit both the pre-training and evaluation of Arabic PLMs. In terms of pre-training, we explo","authors_text":"Abbas Ghaddar, Ahmad Rashid, Baoxing Huai, Chao Xing, Duan Xinyu, Khalil Bibi, Mehdi Rezagholizadeh, Philippe Langlais, Qun Liu, Sunyam Bagga, Xin Jiang, Yasheng Wang, Yimeng Wu, Zhefeng Wang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-05-21T22:38:19Z","title":"Revisiting Pre-trained Language Models and their Evaluation for Arabic Natural Language Understanding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2205.10687","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2c43bccb05b2bfe28df54459b1c4190d1959c1813617caf5fa9f61a8090f83cb","target":"record","created_at":"2026-07-05T04:25:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"efe8a5b44ce80309ea79f62a937d4d112d121dba2af0bf1b0cd1fedebaec0872","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-05-21T22:38:19Z","title_canon_sha256":"3e47267e781dcfd3ac8a61b0042ef1bc181b3331a6a3182f44c83ddfe54da409"},"schema_version":"1.0","source":{"id":"2205.10687","kind":"arxiv","version":1}},"canonical_sha256":"c3f4280ffd43e2ebe1e294c1cfecd179a8b57ae6b7a4e0c6f9bbe740bdfc752b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c3f4280ffd43e2ebe1e294c1cfecd179a8b57ae6b7a4e0c6f9bbe740bdfc752b","first_computed_at":"2026-07-05T04:25:22.698114Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T04:25:22.698114Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"h3AKn3tNQa+GfgKzp0HY+76Lk/yppTQeuD6VZwXgD5hKZIBAW1Bb8G4TbAsujq4a5BS/xCXluPPPYjbGmAGKBA==","signature_status":"signed_v1","signed_at":"2026-07-05T04:25:22.698568Z","signed_message":"canonical_sha256_bytes"},"source_id":"2205.10687","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2c43bccb05b2bfe28df54459b1c4190d1959c1813617caf5fa9f61a8090f83cb","sha256:a77a2ea9dc9961cce89be51c3d7d2ccb2502b5d53b72b11e9020dac280763875"],"state_sha256":"fb128e62a1c992aa3ee57ddbe3bea3948cf71d7f89d81ec28d14cee38ba549c7"}