{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:FKYNFVWWXMXAKB2DV2RUFAON4A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4e529d3742d27491527ba252856225a624e3d0b6ca2322e9fd75ef8470a0f86c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-02T19:52:38Z","title_canon_sha256":"692a4df807f0be9c25aa46ee519ac85cf4cf21ce5efa8796dae2f79ffc630f97"},"schema_version":"1.0","source":{"id":"1810.01480","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.01480","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"arxiv_version","alias_value":"1810.01480v3","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.01480","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"pith_short_12","alias_value":"FKYNFVWWXMXA","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"FKYNFVWWXMXAKB2D","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"FKYNFVWW","created_at":"2026-05-18T12:32:22Z"}],"graph_snapshots":[{"event_id":"sha256:22788eddc6583a0adc551ae8a2ba2b0e0f03798cf956f0565d4c355c050f9d2b","target":"graph","created_at":"2026-05-18T00:01:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Most modern neural machine translation (NMT) systems rely on presegmented inputs. Segmentation granularity importantly determines the input and output sequence lengths, hence the modeling depth, and source and target vocabularies, which in turn determine model size, computational costs of softmax normalization, and handling of out-of-vocabulary words. However, the current practice is to use static, heuristic-based segmentations that are fixed before NMT training. This begs the question whether the chosen segmentation is optimal for the translation task. To overcome suboptimal segmentation choi","authors_text":"Artem Sokolov, Julia Kreutzer","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-02T19:52:38Z","title":"Learning to Segment Inputs for NMT Favors Character-Level Processing"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.01480","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ec3f76662f3de028f824093a55a2b6965c8eb40ebc222605a7ee85e7c3bb6e3f","target":"record","created_at":"2026-05-18T00:01:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4e529d3742d27491527ba252856225a624e3d0b6ca2322e9fd75ef8470a0f86c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-02T19:52:38Z","title_canon_sha256":"692a4df807f0be9c25aa46ee519ac85cf4cf21ce5efa8796dae2f79ffc630f97"},"schema_version":"1.0","source":{"id":"1810.01480","kind":"arxiv","version":3}},"canonical_sha256":"2ab0d2d6d6bb2e050743aea34281cde0241f11240554e7795b70cc5f5de04e15","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2ab0d2d6d6bb2e050743aea34281cde0241f11240554e7795b70cc5f5de04e15","first_computed_at":"2026-05-18T00:01:37.660724Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:37.660724Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qjOAFRUQr9V4GNBQmuGpq6c5Ig0mOg+Z/h2y4USy/UG1AsB/OKT6k807kZzbDSSytiFoLyrIFxlIVmLmZg34BQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:37.661184Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.01480","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ec3f76662f3de028f824093a55a2b6965c8eb40ebc222605a7ee85e7c3bb6e3f","sha256:22788eddc6583a0adc551ae8a2ba2b0e0f03798cf956f0565d4c355c050f9d2b"],"state_sha256":"3aa6f7a4c5fba428683befd2052bed986b09c6ac87abfd41292e9c8143bba87e"}