{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:FYJRYPBSKKSMWYQDXECSZT5JSD","short_pith_number":"pith:FYJRYPBS","canonical_record":{"source":{"id":"2606.08025","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-06T07:37:46Z","cross_cats_sorted":[],"title_canon_sha256":"52b9d62350a07a89d8bcc2b35b62b1b97e45029196cfc2b1fc4e6ea9ea6308ff","abstract_canon_sha256":"fa055e97d4f994385618f23505b7d963d586b2fed6fd9b65839835f494acd691"},"schema_version":"1.0"},"canonical_sha256":"2e131c3c3252a4cb6203b9052ccfa990cf17029b3230158a76b05c598fa6628c","source":{"kind":"arxiv","id":"2606.08025","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08025","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08025v1","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08025","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"pith_short_12","alias_value":"FYJRYPBSKKSM","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"pith_short_16","alias_value":"FYJRYPBSKKSMWYQD","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"pith_short_8","alias_value":"FYJRYPBS","created_at":"2026-06-09T01:05:23Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:FYJRYPBSKKSMWYQDXECSZT5JSD","target":"record","payload":{"canonical_record":{"source":{"id":"2606.08025","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-06T07:37:46Z","cross_cats_sorted":[],"title_canon_sha256":"52b9d62350a07a89d8bcc2b35b62b1b97e45029196cfc2b1fc4e6ea9ea6308ff","abstract_canon_sha256":"fa055e97d4f994385618f23505b7d963d586b2fed6fd9b65839835f494acd691"},"schema_version":"1.0"},"canonical_sha256":"2e131c3c3252a4cb6203b9052ccfa990cf17029b3230158a76b05c598fa6628c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:23.393680Z","signature_b64":"lyDjEzfwSin0m4oaNMqRM+IZxK9VrSPs4pjRYu20Qmp7nXr1YMklygS47qOV+PYyklQYBud/APfqhtposFMgCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e131c3c3252a4cb6203b9052ccfa990cf17029b3230158a76b05c598fa6628c","last_reissued_at":"2026-06-09T01:05:23.393134Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:23.393134Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.08025","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:05:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tJXKtXIIACaf9ojn9rHhuznGBZZa21I1O3uCWHNASe8Lx242cRZsLtW8c/v8ai5B+fTio+gSlB1z2y5G143bBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T18:43:16.386671Z"},"content_sha256":"41c04579953e9efea0d4b3eac738cd51225bad3c5046985736dada55c5090694","schema_version":"1.0","event_id":"sha256:41c04579953e9efea0d4b3eac738cd51225bad3c5046985736dada55c5090694"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:FYJRYPBSKKSMWYQDXECSZT5JSD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Arabic Sentence Segmentation Across Genres and Punctuation Conditions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bashar Alhafni, Khalid N. Elmadani, Mohammed Elkholy, Nizar Habash","submitted_at":"2026-06-06T07:37:46Z","abstract_excerpt":"Sentence segmentation in Arabic is challenging due to ambiguous and inconsistent punctuation, with many texts lacking reliable sentence boundary markers. Existing approaches rely heavily on punctuation cues and are typically evaluated on well-formed text, limiting their robustness in realistic Arabic settings. To address this, we introduce AraSEG, a genre-diverse sentence segmentation corpus spanning eight genres and a wide range of punctuation and document structure conditions. Using AraSEG, we evaluate LLMs, lightweight encoder models, and dependency parser-based models under increasingly ch"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08025","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08025/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:05:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Lr8cEjV4Cf6Unqb85SMh+xXbtXPc25bB0DrVd9MZMWDmOHO5zG/3Tiaexp80HGyrIf7noxRCVWBYdnjVr4gXDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T18:43:16.387038Z"},"content_sha256":"b435daedc991ef1a608f9ee8a01a29b631970e03acc56a820b58239973557370","schema_version":"1.0","event_id":"sha256:b435daedc991ef1a608f9ee8a01a29b631970e03acc56a820b58239973557370"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FYJRYPBSKKSMWYQDXECSZT5JSD/bundle.json","state_url":"https://pith.science/pith/FYJRYPBSKKSMWYQDXECSZT5JSD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FYJRYPBSKKSMWYQDXECSZT5JSD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T18:43:16Z","links":{"resolver":"https://pith.science/pith/FYJRYPBSKKSMWYQDXECSZT5JSD","bundle":"https://pith.science/pith/FYJRYPBSKKSMWYQDXECSZT5JSD/bundle.json","state":"https://pith.science/pith/FYJRYPBSKKSMWYQDXECSZT5JSD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FYJRYPBSKKSMWYQDXECSZT5JSD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:FYJRYPBSKKSMWYQDXECSZT5JSD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fa055e97d4f994385618f23505b7d963d586b2fed6fd9b65839835f494acd691","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-06T07:37:46Z","title_canon_sha256":"52b9d62350a07a89d8bcc2b35b62b1b97e45029196cfc2b1fc4e6ea9ea6308ff"},"schema_version":"1.0","source":{"id":"2606.08025","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08025","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08025v1","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08025","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"pith_short_12","alias_value":"FYJRYPBSKKSM","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"pith_short_16","alias_value":"FYJRYPBSKKSMWYQD","created_at":"2026-06-09T01:05:23Z"},{"alias_kind":"pith_short_8","alias_value":"FYJRYPBS","created_at":"2026-06-09T01:05:23Z"}],"graph_snapshots":[{"event_id":"sha256:b435daedc991ef1a608f9ee8a01a29b631970e03acc56a820b58239973557370","target":"graph","created_at":"2026-06-09T01:05:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.08025/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Sentence segmentation in Arabic is challenging due to ambiguous and inconsistent punctuation, with many texts lacking reliable sentence boundary markers. Existing approaches rely heavily on punctuation cues and are typically evaluated on well-formed text, limiting their robustness in realistic Arabic settings. To address this, we introduce AraSEG, a genre-diverse sentence segmentation corpus spanning eight genres and a wide range of punctuation and document structure conditions. Using AraSEG, we evaluate LLMs, lightweight encoder models, and dependency parser-based models under increasingly ch","authors_text":"Bashar Alhafni, Khalid N. Elmadani, Mohammed Elkholy, Nizar Habash","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-06T07:37:46Z","title":"Arabic Sentence Segmentation Across Genres and Punctuation Conditions"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08025","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:41c04579953e9efea0d4b3eac738cd51225bad3c5046985736dada55c5090694","target":"record","created_at":"2026-06-09T01:05:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fa055e97d4f994385618f23505b7d963d586b2fed6fd9b65839835f494acd691","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-06T07:37:46Z","title_canon_sha256":"52b9d62350a07a89d8bcc2b35b62b1b97e45029196cfc2b1fc4e6ea9ea6308ff"},"schema_version":"1.0","source":{"id":"2606.08025","kind":"arxiv","version":1}},"canonical_sha256":"2e131c3c3252a4cb6203b9052ccfa990cf17029b3230158a76b05c598fa6628c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2e131c3c3252a4cb6203b9052ccfa990cf17029b3230158a76b05c598fa6628c","first_computed_at":"2026-06-09T01:05:23.393134Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:05:23.393134Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lyDjEzfwSin0m4oaNMqRM+IZxK9VrSPs4pjRYu20Qmp7nXr1YMklygS47qOV+PYyklQYBud/APfqhtposFMgCg==","signature_status":"signed_v1","signed_at":"2026-06-09T01:05:23.393680Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.08025","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:41c04579953e9efea0d4b3eac738cd51225bad3c5046985736dada55c5090694","sha256:b435daedc991ef1a608f9ee8a01a29b631970e03acc56a820b58239973557370"],"state_sha256":"3e106a2f299bd2ba9b3dc860edaad49ee178ffd7ec12b6ac0c45ec1eb8d07ae5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nH67S/shalzxIPelHKEQfRLMwvJl7FQqk5nD3scpwnInhcCUmw0E8gsexxynqgePxbyBZeqOGhPW9vqBthYFAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T18:43:16.388984Z","bundle_sha256":"59bf609d81b3164596a78103d2ff64de556f69e05eb33662df4057583846cdf9"}}