{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:YFCMQ4FSNBRG2VHC54Z62Z22DM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"dfca4bb3d8938844aa6385c1ad258dea900bb6d59f3232c7a0e11621609964e3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T11:44:48Z","title_canon_sha256":"475fc39286403b41fb2dfd0917b6a326082d97bd4ad0893b41e93ef643c58e3c"},"schema_version":"1.0","source":{"id":"1806.05482","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.05482","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"arxiv_version","alias_value":"1806.05482v1","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.05482","created_at":"2026-05-18T00:13:15Z"},{"alias_kind":"pith_short_12","alias_value":"YFCMQ4FSNBRG","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"YFCMQ4FSNBRG2VHC","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"YFCMQ4FS","created_at":"2026-05-18T12:33:04Z"}],"graph_snapshots":[{"event_id":"sha256:e31c69a3edefe26180375f1e8a33ad73fceede60453d1c42dffa1530850cb700","target":"graph","created_at":"2026-05-18T00:13:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The state of the art of handling rich morphology in neural machine translation (NMT) is to break word forms into subword units, so that the overall vocabulary size of these units fits the practical limits given by the NMT model and GPU memory capacity. In this paper, we compare two common but linguistically uninformed methods of subword construction (BPE and STE, the method implemented in Tensor2Tensor toolkit) and two linguistically-motivated methods: Morfessor and one novel method, based on a derivational dictionary. Our experiments with German-to-Czech translation, both morphologically rich","authors_text":"Dominik Mach\\'a\\v{c}ek, Jon\\'a\\v{s} Vidra, Ond\\v{r}ej Bojar","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T11:44:48Z","title":"Morphological and Language-Agnostic Word Segmentation for NMT"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.05482","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b533dcf6b525dcae70a8f70acc5cb0fce573a8e26542dc7f6ee4b9a1115013ac","target":"record","created_at":"2026-05-18T00:13:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"dfca4bb3d8938844aa6385c1ad258dea900bb6d59f3232c7a0e11621609964e3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-14T11:44:48Z","title_canon_sha256":"475fc39286403b41fb2dfd0917b6a326082d97bd4ad0893b41e93ef643c58e3c"},"schema_version":"1.0","source":{"id":"1806.05482","kind":"arxiv","version":1}},"canonical_sha256":"c144c870b268626d54e2ef33ed675a1b13ee49b99ac85314c3a9374e6a99d7d3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c144c870b268626d54e2ef33ed675a1b13ee49b99ac85314c3a9374e6a99d7d3","first_computed_at":"2026-05-18T00:13:15.073265Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:15.073265Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"rBlddJTmtHtHRQFyGX5bBdgEK0EO/JqajdeIAYGfA4nBSwFs80PbdEFtAP7wFXrN1RU5SM7HRzQe0Nte+YvWBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:15.073966Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.05482","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b533dcf6b525dcae70a8f70acc5cb0fce573a8e26542dc7f6ee4b9a1115013ac","sha256:e31c69a3edefe26180375f1e8a33ad73fceede60453d1c42dffa1530850cb700"],"state_sha256":"3568c3c0641a1a4592e5f866e7aec10045e4e02169f5052f21b63f54e6b3ee6b"}