{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:T236F6P3SZUUCUZY7S5URMHSU3","short_pith_number":"pith:T236F6P3","canonical_record":{"source":{"id":"1807.02974","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-09T07:51:51Z","cross_cats_sorted":[],"title_canon_sha256":"a220b963be9c0d3a096a7b666cb21b834bae86e19a534a40333f15e475af9dfa","abstract_canon_sha256":"2a272dc31010c7494b54884387b1d4b8b1f3da1205a56fea7596570b603e7bf4"},"schema_version":"1.0"},"canonical_sha256":"9eb7e2f9fb9669415338fcbb48b0f2a6fc5efbb02a7fb5591b2c54f9ea23a0a8","source":{"kind":"arxiv","id":"1807.02974","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.02974","created_at":"2026-05-18T00:11:13Z"},{"alias_kind":"arxiv_version","alias_value":"1807.02974v1","created_at":"2026-05-18T00:11:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.02974","created_at":"2026-05-18T00:11:13Z"},{"alias_kind":"pith_short_12","alias_value":"T236F6P3SZUU","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"T236F6P3SZUUCUZY","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"T236F6P3","created_at":"2026-05-18T12:32:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:T236F6P3SZUUCUZY7S5URMHSU3","target":"record","payload":{"canonical_record":{"source":{"id":"1807.02974","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-09T07:51:51Z","cross_cats_sorted":[],"title_canon_sha256":"a220b963be9c0d3a096a7b666cb21b834bae86e19a534a40333f15e475af9dfa","abstract_canon_sha256":"2a272dc31010c7494b54884387b1d4b8b1f3da1205a56fea7596570b603e7bf4"},"schema_version":"1.0"},"canonical_sha256":"9eb7e2f9fb9669415338fcbb48b0f2a6fc5efbb02a7fb5591b2c54f9ea23a0a8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:11:13.163802Z","signature_b64":"MCSEuBp5qj8YkzO7Q+873zsU6IFGFhW4dUBbo5T4VgM4xoIzBgyAuNnaHrSEtyDjJ6JW7XKw6dySNwc/ZZzsAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9eb7e2f9fb9669415338fcbb48b0f2a6fc5efbb02a7fb5591b2c54f9ea23a0a8","last_reissued_at":"2026-05-18T00:11:13.163074Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:11:13.163074Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1807.02974","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lgv0SZP3j7u1Ox4IZx5CVqOFZ5Zj85TuTY4nYq9dH9tkBBfyHUYtbFw4CoFrzkUwta45L0iUkO1966vNLkEHDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T22:23:53.125072Z"},"content_sha256":"8432dc5b872bf362da6b1d78fb6276f9d7a3382b0b01991f78f694de460521dc","schema_version":"1.0","event_id":"sha256:8432dc5b872bf362da6b1d78fb6276f9d7a3382b0b01991f78f694de460521dc"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:T236F6P3SZUUCUZY7S5URMHSU3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Universal Word Segmentation: Implementation and Interpretation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Christian Hardmeier, Joakim Nivre, Yan Shao","submitted_at":"2018-07-09T07:51:51Z","abstract_excerpt":"Word segmentation is a low-level NLP task that is non-trivial for a considerable number of languages. In this paper, we present a sequence tagging framework and apply it to word segmentation for a wide range of languages with different writing systems and typological characteristics. Additionally, we investigate the correlations between various typological factors and word segmentation accuracy. The experimental results indicate that segmentation accuracy is positively related to word boundary markers and negatively to the number of unique non-segmental terms. Based on the analysis, we design "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.02974","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"//UVBoECscqD8U2BKiYqYtZLcSMVMMRoYErtZIrW67sB2+XqSA6tGUaMY8H2cb5f9wGCOen6tTHP+qTxBiQpBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T22:23:53.125433Z"},"content_sha256":"62772602d2d0e4b6b9f3d661de6ca4bb710bdbef807be1d2bad35b6fbc1f54d8","schema_version":"1.0","event_id":"sha256:62772602d2d0e4b6b9f3d661de6ca4bb710bdbef807be1d2bad35b6fbc1f54d8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/T236F6P3SZUUCUZY7S5URMHSU3/bundle.json","state_url":"https://pith.science/pith/T236F6P3SZUUCUZY7S5URMHSU3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/T236F6P3SZUUCUZY7S5URMHSU3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T22:23:53Z","links":{"resolver":"https://pith.science/pith/T236F6P3SZUUCUZY7S5URMHSU3","bundle":"https://pith.science/pith/T236F6P3SZUUCUZY7S5URMHSU3/bundle.json","state":"https://pith.science/pith/T236F6P3SZUUCUZY7S5URMHSU3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/T236F6P3SZUUCUZY7S5URMHSU3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:T236F6P3SZUUCUZY7S5URMHSU3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2a272dc31010c7494b54884387b1d4b8b1f3da1205a56fea7596570b603e7bf4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-09T07:51:51Z","title_canon_sha256":"a220b963be9c0d3a096a7b666cb21b834bae86e19a534a40333f15e475af9dfa"},"schema_version":"1.0","source":{"id":"1807.02974","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.02974","created_at":"2026-05-18T00:11:13Z"},{"alias_kind":"arxiv_version","alias_value":"1807.02974v1","created_at":"2026-05-18T00:11:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.02974","created_at":"2026-05-18T00:11:13Z"},{"alias_kind":"pith_short_12","alias_value":"T236F6P3SZUU","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"T236F6P3SZUUCUZY","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"T236F6P3","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:62772602d2d0e4b6b9f3d661de6ca4bb710bdbef807be1d2bad35b6fbc1f54d8","target":"graph","created_at":"2026-05-18T00:11:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Word segmentation is a low-level NLP task that is non-trivial for a considerable number of languages. In this paper, we present a sequence tagging framework and apply it to word segmentation for a wide range of languages with different writing systems and typological characteristics. Additionally, we investigate the correlations between various typological factors and word segmentation accuracy. The experimental results indicate that segmentation accuracy is positively related to word boundary markers and negatively to the number of unique non-segmental terms. Based on the analysis, we design ","authors_text":"Christian Hardmeier, Joakim Nivre, Yan Shao","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-09T07:51:51Z","title":"Universal Word Segmentation: Implementation and Interpretation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.02974","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8432dc5b872bf362da6b1d78fb6276f9d7a3382b0b01991f78f694de460521dc","target":"record","created_at":"2026-05-18T00:11:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2a272dc31010c7494b54884387b1d4b8b1f3da1205a56fea7596570b603e7bf4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-09T07:51:51Z","title_canon_sha256":"a220b963be9c0d3a096a7b666cb21b834bae86e19a534a40333f15e475af9dfa"},"schema_version":"1.0","source":{"id":"1807.02974","kind":"arxiv","version":1}},"canonical_sha256":"9eb7e2f9fb9669415338fcbb48b0f2a6fc5efbb02a7fb5591b2c54f9ea23a0a8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9eb7e2f9fb9669415338fcbb48b0f2a6fc5efbb02a7fb5591b2c54f9ea23a0a8","first_computed_at":"2026-05-18T00:11:13.163074Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:11:13.163074Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"MCSEuBp5qj8YkzO7Q+873zsU6IFGFhW4dUBbo5T4VgM4xoIzBgyAuNnaHrSEtyDjJ6JW7XKw6dySNwc/ZZzsAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:11:13.163802Z","signed_message":"canonical_sha256_bytes"},"source_id":"1807.02974","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8432dc5b872bf362da6b1d78fb6276f9d7a3382b0b01991f78f694de460521dc","sha256:62772602d2d0e4b6b9f3d661de6ca4bb710bdbef807be1d2bad35b6fbc1f54d8"],"state_sha256":"cfdc37d11737605b022b600df6633fa10e496bfe90639a697cd0f148c43b8a43"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0kRuWaDbC5H3Yz0lLQsCxm2MePn9fSFLP7RIKzz8HbFldsyGm5GvOz4kxwWGBp4Ry7eacUzrHuDUhpOGRsDdCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T22:23:53.127400Z","bundle_sha256":"6bb0085d29220c8cfbebf04fb5d1203eb995f36a313ca0b08fdd49ea44cf57cd"}}