{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:VPTEXIDL7GDQZ3ACCMY2D3RP7M","short_pith_number":"pith:VPTEXIDL","canonical_record":{"source":{"id":"1809.03891","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-11T13:44:48Z","cross_cats_sorted":[],"title_canon_sha256":"2e1e46e52e1cd5baf3ad373efb98c2acdac17af5525042a8d4e620542ff6cd64","abstract_canon_sha256":"6e00b37b76f782aed562ab468cb2c89617a8c3dc12845d6dcf400c0a839a8d5a"},"schema_version":"1.0"},"canonical_sha256":"abe64ba06bf9870cec021331a1ee2ffb30690c399d3933b2be6c6d76a712c0c2","source":{"kind":"arxiv","id":"1809.03891","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.03891","created_at":"2026-05-18T00:05:57Z"},{"alias_kind":"arxiv_version","alias_value":"1809.03891v1","created_at":"2026-05-18T00:05:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.03891","created_at":"2026-05-18T00:05:57Z"},{"alias_kind":"pith_short_12","alias_value":"VPTEXIDL7GDQ","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_16","alias_value":"VPTEXIDL7GDQZ3AC","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_8","alias_value":"VPTEXIDL","created_at":"2026-05-18T12:32:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:VPTEXIDL7GDQZ3ACCMY2D3RP7M","target":"record","payload":{"canonical_record":{"source":{"id":"1809.03891","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-11T13:44:48Z","cross_cats_sorted":[],"title_canon_sha256":"2e1e46e52e1cd5baf3ad373efb98c2acdac17af5525042a8d4e620542ff6cd64","abstract_canon_sha256":"6e00b37b76f782aed562ab468cb2c89617a8c3dc12845d6dcf400c0a839a8d5a"},"schema_version":"1.0"},"canonical_sha256":"abe64ba06bf9870cec021331a1ee2ffb30690c399d3933b2be6c6d76a712c0c2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:05:57.798261Z","signature_b64":"3uvm5fkdcrMUFwS/G70hvRhNLI8DzzreENn1tYjRHOrVKyrTScaYSg4QRZVfzg718vjZ1zS4w3PbQyFbt34AAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"abe64ba06bf9870cec021331a1ee2ffb30690c399d3933b2be6c6d76a712c0c2","last_reissued_at":"2026-05-18T00:05:57.797722Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:05:57.797722Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.03891","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:05:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DrM3FSYLDVjzARXaH2lwoAJ8GF/46L2w+7csDwKMHHq3VASbFi4/lgr2aKoZ/lHwmRbjhIr3vRqhfasiXIf9CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T17:02:01.528872Z"},"content_sha256":"b8da90b073329782d3aa12ab6fecb2aa6c22169e5cb79a60f8d41c6ea2d1826c","schema_version":"1.0","event_id":"sha256:b8da90b073329782d3aa12ab6fecb2aa6c22169e5cb79a60f8d41c6ea2d1826c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:VPTEXIDL7GDQZ3ACCMY2D3RP7M","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Studying the History of the Arabic Language: Language Technology and a Large-Scale Historical Corpus","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Alberto Barr\\'on-Cede\\~no, Alexander Magidow, Avi Shmidman, Maxim Romanov, Yonatan Belinkov","submitted_at":"2018-09-11T13:44:48Z","abstract_excerpt":"Arabic is a widely-spoken language with a long and rich history, but existing corpora and language technology focus mostly on modern Arabic and its varieties. Therefore, studying the history of the language has so far been mostly limited to manual analyses on a small scale. In this work, we present a large-scale historical corpus of the written Arabic language, spanning 1400 years. We describe our efforts to clean and process this corpus using Arabic NLP tools, including the identification of reused text. We study the history of the Arabic language using a novel automatic periodization algorit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.03891","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:05:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"B4ziQqC/W/jrcHGF+ZqKH+0ihk4TMcQhrsN5RNLBi+FJ6ghCH/aZUGY4jLZn2mXLvxRGmZ1IwpNHoxFyrCVYAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T17:02:01.529415Z"},"content_sha256":"55191c166d33b148107aadfdcb2a13c694fee86e2c14090471e4b3c477e36542","schema_version":"1.0","event_id":"sha256:55191c166d33b148107aadfdcb2a13c694fee86e2c14090471e4b3c477e36542"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M/bundle.json","state_url":"https://pith.science/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T17:02:01Z","links":{"resolver":"https://pith.science/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M","bundle":"https://pith.science/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M/bundle.json","state":"https://pith.science/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VPTEXIDL7GDQZ3ACCMY2D3RP7M/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:VPTEXIDL7GDQZ3ACCMY2D3RP7M","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6e00b37b76f782aed562ab468cb2c89617a8c3dc12845d6dcf400c0a839a8d5a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-11T13:44:48Z","title_canon_sha256":"2e1e46e52e1cd5baf3ad373efb98c2acdac17af5525042a8d4e620542ff6cd64"},"schema_version":"1.0","source":{"id":"1809.03891","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.03891","created_at":"2026-05-18T00:05:57Z"},{"alias_kind":"arxiv_version","alias_value":"1809.03891v1","created_at":"2026-05-18T00:05:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.03891","created_at":"2026-05-18T00:05:57Z"},{"alias_kind":"pith_short_12","alias_value":"VPTEXIDL7GDQ","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_16","alias_value":"VPTEXIDL7GDQZ3AC","created_at":"2026-05-18T12:32:59Z"},{"alias_kind":"pith_short_8","alias_value":"VPTEXIDL","created_at":"2026-05-18T12:32:59Z"}],"graph_snapshots":[{"event_id":"sha256:55191c166d33b148107aadfdcb2a13c694fee86e2c14090471e4b3c477e36542","target":"graph","created_at":"2026-05-18T00:05:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Arabic is a widely-spoken language with a long and rich history, but existing corpora and language technology focus mostly on modern Arabic and its varieties. Therefore, studying the history of the language has so far been mostly limited to manual analyses on a small scale. In this work, we present a large-scale historical corpus of the written Arabic language, spanning 1400 years. We describe our efforts to clean and process this corpus using Arabic NLP tools, including the identification of reused text. We study the history of the Arabic language using a novel automatic periodization algorit","authors_text":"Alberto Barr\\'on-Cede\\~no, Alexander Magidow, Avi Shmidman, Maxim Romanov, Yonatan Belinkov","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-11T13:44:48Z","title":"Studying the History of the Arabic Language: Language Technology and a Large-Scale Historical Corpus"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.03891","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b8da90b073329782d3aa12ab6fecb2aa6c22169e5cb79a60f8d41c6ea2d1826c","target":"record","created_at":"2026-05-18T00:05:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6e00b37b76f782aed562ab468cb2c89617a8c3dc12845d6dcf400c0a839a8d5a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-11T13:44:48Z","title_canon_sha256":"2e1e46e52e1cd5baf3ad373efb98c2acdac17af5525042a8d4e620542ff6cd64"},"schema_version":"1.0","source":{"id":"1809.03891","kind":"arxiv","version":1}},"canonical_sha256":"abe64ba06bf9870cec021331a1ee2ffb30690c399d3933b2be6c6d76a712c0c2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"abe64ba06bf9870cec021331a1ee2ffb30690c399d3933b2be6c6d76a712c0c2","first_computed_at":"2026-05-18T00:05:57.797722Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:05:57.797722Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3uvm5fkdcrMUFwS/G70hvRhNLI8DzzreENn1tYjRHOrVKyrTScaYSg4QRZVfzg718vjZ1zS4w3PbQyFbt34AAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:05:57.798261Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.03891","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b8da90b073329782d3aa12ab6fecb2aa6c22169e5cb79a60f8d41c6ea2d1826c","sha256:55191c166d33b148107aadfdcb2a13c694fee86e2c14090471e4b3c477e36542"],"state_sha256":"1c43dc3137def25860d8cfea7f060d662c99d3b9909d8b0101850ff48ffc2341"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mZ+jcUov7uTo248CRFIszgrmb4kWjdN22Nnpc5TaVVw/rvJPVOnOykZwf6548QNcs/h/rkotGtvaML/SwaV8DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T17:02:01.531930Z","bundle_sha256":"c0f03fc911f9280ef0a6b8b5becceda7bc37378bbfe581e59f49e5311b1d0e77"}}