{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:LX3T2SDCEZMEF76I6TYIFOMHUJ","short_pith_number":"pith:LX3T2SDC","canonical_record":{"source":{"id":"1812.00815","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-12-03T15:04:23Z","cross_cats_sorted":[],"title_canon_sha256":"324e8bd6b2347f824c2f92b3693805c8272f3285b5aec64539c329373bb0e67d","abstract_canon_sha256":"5d76836042656d4ca82f1551dce1bd71fed72546b466ca6593bf8ee5b984e06f"},"schema_version":"1.0"},"canonical_sha256":"5df73d4862265842ffc8f4f082b987a279f39c93ffa31a932eb1f02f6ddcc6e5","source":{"kind":"arxiv","id":"1812.00815","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.00815","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"arxiv_version","alias_value":"1812.00815v1","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.00815","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"pith_short_12","alias_value":"LX3T2SDCEZME","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LX3T2SDCEZMEF76I","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LX3T2SDC","created_at":"2026-05-18T12:32:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:LX3T2SDCEZMEF76I6TYIFOMHUJ","target":"record","payload":{"canonical_record":{"source":{"id":"1812.00815","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-12-03T15:04:23Z","cross_cats_sorted":[],"title_canon_sha256":"324e8bd6b2347f824c2f92b3693805c8272f3285b5aec64539c329373bb0e67d","abstract_canon_sha256":"5d76836042656d4ca82f1551dce1bd71fed72546b466ca6593bf8ee5b984e06f"},"schema_version":"1.0"},"canonical_sha256":"5df73d4862265842ffc8f4f082b987a279f39c93ffa31a932eb1f02f6ddcc6e5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:18.917798Z","signature_b64":"OlKk775MkQuwy95enuRCI2XJ6cqdbob+x8qArgKE8kyJuvLOvWXNVASJGrr+kT7paWc/0UWaMyClDYzcT4ZZBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5df73d4862265842ffc8f4f082b987a279f39c93ffa31a932eb1f02f6ddcc6e5","last_reissued_at":"2026-05-17T23:59:18.917289Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:18.917289Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.00815","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EdlwMhAV7a1ltLYaJ/B8wcSiwpM2gKcKwkQL/QI+W9KHwlEBj5Omao9htJyzJi/ISBp22/SS0SHsxhkc0YW3Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T13:06:53.865278Z"},"content_sha256":"6b5984c087419be9b76a9b3276ad51a01b70156c6599ba939ca99eccc85e1523","schema_version":"1.0","event_id":"sha256:6b5984c087419be9b76a9b3276ad51a01b70156c6599ba939ca99eccc85e1523"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:LX3T2SDCEZMEF76I6TYIFOMHUJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Comparing Neural- and N-Gram-Based Language Models for Word Segmentation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Carlos G\\'omez-Rodr\\'iguez, Yerai Doval","submitted_at":"2018-12-03T15:04:23Z","abstract_excerpt":"Word segmentation is the task of inserting or deleting word boundary characters in order to separate character sequences that correspond to words in some language. In this article we propose an approach based on a beam search algorithm and a language model working at the byte/character level, the latter component implemented either as an n-gram model or a recurrent neural network. The resulting system analyzes the text input with no word boundaries one token at a time, which can be a character or a byte, and uses the information gathered by the language model to determine if a boundary must be"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.00815","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bs1W5uMSxkCG0mnb2qbVkzCxhpZPfDCYrNEHe4aaxWgtHpmEadTyfhZIWD7YxlPdC1LkVdueIJhs/OI89XHdBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T13:06:53.865634Z"},"content_sha256":"222d67c1311905466e25a3fd5a48218f87efa0ad105ebfc404a2b0b5d8ab2af5","schema_version":"1.0","event_id":"sha256:222d67c1311905466e25a3fd5a48218f87efa0ad105ebfc404a2b0b5d8ab2af5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ/bundle.json","state_url":"https://pith.science/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T13:06:53Z","links":{"resolver":"https://pith.science/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ","bundle":"https://pith.science/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ/bundle.json","state":"https://pith.science/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LX3T2SDCEZMEF76I6TYIFOMHUJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:LX3T2SDCEZMEF76I6TYIFOMHUJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5d76836042656d4ca82f1551dce1bd71fed72546b466ca6593bf8ee5b984e06f","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-12-03T15:04:23Z","title_canon_sha256":"324e8bd6b2347f824c2f92b3693805c8272f3285b5aec64539c329373bb0e67d"},"schema_version":"1.0","source":{"id":"1812.00815","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.00815","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"arxiv_version","alias_value":"1812.00815v1","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.00815","created_at":"2026-05-17T23:59:18Z"},{"alias_kind":"pith_short_12","alias_value":"LX3T2SDCEZME","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LX3T2SDCEZMEF76I","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LX3T2SDC","created_at":"2026-05-18T12:32:37Z"}],"graph_snapshots":[{"event_id":"sha256:222d67c1311905466e25a3fd5a48218f87efa0ad105ebfc404a2b0b5d8ab2af5","target":"graph","created_at":"2026-05-17T23:59:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Word segmentation is the task of inserting or deleting word boundary characters in order to separate character sequences that correspond to words in some language. In this article we propose an approach based on a beam search algorithm and a language model working at the byte/character level, the latter component implemented either as an n-gram model or a recurrent neural network. The resulting system analyzes the text input with no word boundaries one token at a time, which can be a character or a byte, and uses the information gathered by the language model to determine if a boundary must be","authors_text":"Carlos G\\'omez-Rodr\\'iguez, Yerai Doval","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-12-03T15:04:23Z","title":"Comparing Neural- and N-Gram-Based Language Models for Word Segmentation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.00815","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6b5984c087419be9b76a9b3276ad51a01b70156c6599ba939ca99eccc85e1523","target":"record","created_at":"2026-05-17T23:59:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5d76836042656d4ca82f1551dce1bd71fed72546b466ca6593bf8ee5b984e06f","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-12-03T15:04:23Z","title_canon_sha256":"324e8bd6b2347f824c2f92b3693805c8272f3285b5aec64539c329373bb0e67d"},"schema_version":"1.0","source":{"id":"1812.00815","kind":"arxiv","version":1}},"canonical_sha256":"5df73d4862265842ffc8f4f082b987a279f39c93ffa31a932eb1f02f6ddcc6e5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5df73d4862265842ffc8f4f082b987a279f39c93ffa31a932eb1f02f6ddcc6e5","first_computed_at":"2026-05-17T23:59:18.917289Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:59:18.917289Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OlKk775MkQuwy95enuRCI2XJ6cqdbob+x8qArgKE8kyJuvLOvWXNVASJGrr+kT7paWc/0UWaMyClDYzcT4ZZBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:59:18.917798Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.00815","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6b5984c087419be9b76a9b3276ad51a01b70156c6599ba939ca99eccc85e1523","sha256:222d67c1311905466e25a3fd5a48218f87efa0ad105ebfc404a2b0b5d8ab2af5"],"state_sha256":"3fe78d5780e3e2b3ef7fbdb9a13ec4ae24367abaa88fb50f139b5ef532dd2673"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aPQeExUKgxK8xpG/gzvrvyDk1qhB0B+yQTQgiobQtCYl5pvxqxku34b58hkTmKC0m7/A9dcKTNmTjkYq5ecCCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T13:06:53.867502Z","bundle_sha256":"1cf6c4d1e1531db7c5f4f23873a5c46e31260ec377a0cea29670ad20ff681532"}}