{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:GAO7GTNZPTXMZ7A2NJF2YAPMMF","short_pith_number":"pith:GAO7GTNZ","canonical_record":{"source":{"id":"1707.09879","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-31T14:31:01Z","cross_cats_sorted":[],"title_canon_sha256":"e684fb590de37bea1fd7686c9346ec6976d0c587a73d4a7fa58afa85f4b6c7c7","abstract_canon_sha256":"9793f2697f20ec6df7599abc4b0c7bbc753059109ff55a16da43c87d78e486f5"},"schema_version":"1.0"},"canonical_sha256":"301df34db97ceeccfc1a6a4bac01ec615d981a533690298408ed1ce6fdf1b0b9","source":{"kind":"arxiv","id":"1707.09879","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.09879","created_at":"2026-05-18T00:39:07Z"},{"alias_kind":"arxiv_version","alias_value":"1707.09879v1","created_at":"2026-05-18T00:39:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.09879","created_at":"2026-05-18T00:39:07Z"},{"alias_kind":"pith_short_12","alias_value":"GAO7GTNZPTXM","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"GAO7GTNZPTXMZ7A2","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"GAO7GTNZ","created_at":"2026-05-18T12:31:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:GAO7GTNZPTXMZ7A2NJF2YAPMMF","target":"record","payload":{"canonical_record":{"source":{"id":"1707.09879","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-31T14:31:01Z","cross_cats_sorted":[],"title_canon_sha256":"e684fb590de37bea1fd7686c9346ec6976d0c587a73d4a7fa58afa85f4b6c7c7","abstract_canon_sha256":"9793f2697f20ec6df7599abc4b0c7bbc753059109ff55a16da43c87d78e486f5"},"schema_version":"1.0"},"canonical_sha256":"301df34db97ceeccfc1a6a4bac01ec615d981a533690298408ed1ce6fdf1b0b9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:39:07.907047Z","signature_b64":"5DcUkmXI/0eFs8MH7DtRjK88C356+s+p/4ZkohukldanFc6f7x26kcVxxpjpEuW6ELvglNl84lMeGSDBj/LnDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"301df34db97ceeccfc1a6a4bac01ec615d981a533690298408ed1ce6fdf1b0b9","last_reissued_at":"2026-05-18T00:39:07.906263Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:39:07.906263Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.09879","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:39:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BhYLeHGO8jgSUxSjisPCbUXgO9Nrzpc0puz2LZYQtMWt3PGkfLgGZrjSUseFELfjXYI1r+QLtIdiq8rieiXPBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T09:46:19.640930Z"},"content_sha256":"24d9b16fd73f15fff42cc6e327402c24cfcb7d739212d71b5b789cd70a2454b6","schema_version":"1.0","event_id":"sha256:24d9b16fd73f15fff42cc6e327402c24cfcb7d739212d71b5b789cd70a2454b6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:GAO7GTNZPTXMZ7A2NJF2YAPMMF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Linguistically Motivated Vocabulary Reduction for Neural Machine Translation from Turkish to English","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Duygu Ataman, Marcello Federico, Marco Turchi, Matteo Negri","submitted_at":"2017-07-31T14:31:01Z","abstract_excerpt":"The necessity of using a fixed-size word vocabulary in order to control the model complexity in state-of-the-art neural machine translation (NMT) systems is an important bottleneck on performance, especially for morphologically rich languages. Conventional methods that aim to overcome this problem by using sub-word or character-level representations solely rely on statistics and disregard the linguistic properties of words, which leads to interruptions in the word structure and causes semantic and syntactic losses. In this paper, we propose a new vocabulary reduction method for NMT, which can "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.09879","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:39:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fBnB5mfL+WDo4X90zccl+/9zEsMObkVQeQW9NNpR+9UjBP+HfPqCTc3Qd4SmfN47uFaWy0gG1ZEbvKiQ2bKYBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T09:46:19.641589Z"},"content_sha256":"f7fa85f6d60cb8647ef976c8eb5d381b789d3e850014bb40fe2af99d3df5e833","schema_version":"1.0","event_id":"sha256:f7fa85f6d60cb8647ef976c8eb5d381b789d3e850014bb40fe2af99d3df5e833"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF/bundle.json","state_url":"https://pith.science/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-12T09:46:19Z","links":{"resolver":"https://pith.science/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF","bundle":"https://pith.science/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF/bundle.json","state":"https://pith.science/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GAO7GTNZPTXMZ7A2NJF2YAPMMF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:GAO7GTNZPTXMZ7A2NJF2YAPMMF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9793f2697f20ec6df7599abc4b0c7bbc753059109ff55a16da43c87d78e486f5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-31T14:31:01Z","title_canon_sha256":"e684fb590de37bea1fd7686c9346ec6976d0c587a73d4a7fa58afa85f4b6c7c7"},"schema_version":"1.0","source":{"id":"1707.09879","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.09879","created_at":"2026-05-18T00:39:07Z"},{"alias_kind":"arxiv_version","alias_value":"1707.09879v1","created_at":"2026-05-18T00:39:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.09879","created_at":"2026-05-18T00:39:07Z"},{"alias_kind":"pith_short_12","alias_value":"GAO7GTNZPTXM","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"GAO7GTNZPTXMZ7A2","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"GAO7GTNZ","created_at":"2026-05-18T12:31:15Z"}],"graph_snapshots":[{"event_id":"sha256:f7fa85f6d60cb8647ef976c8eb5d381b789d3e850014bb40fe2af99d3df5e833","target":"graph","created_at":"2026-05-18T00:39:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The necessity of using a fixed-size word vocabulary in order to control the model complexity in state-of-the-art neural machine translation (NMT) systems is an important bottleneck on performance, especially for morphologically rich languages. Conventional methods that aim to overcome this problem by using sub-word or character-level representations solely rely on statistics and disregard the linguistic properties of words, which leads to interruptions in the word structure and causes semantic and syntactic losses. In this paper, we propose a new vocabulary reduction method for NMT, which can ","authors_text":"Duygu Ataman, Marcello Federico, Marco Turchi, Matteo Negri","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-31T14:31:01Z","title":"Linguistically Motivated Vocabulary Reduction for Neural Machine Translation from Turkish to English"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.09879","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:24d9b16fd73f15fff42cc6e327402c24cfcb7d739212d71b5b789cd70a2454b6","target":"record","created_at":"2026-05-18T00:39:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9793f2697f20ec6df7599abc4b0c7bbc753059109ff55a16da43c87d78e486f5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-31T14:31:01Z","title_canon_sha256":"e684fb590de37bea1fd7686c9346ec6976d0c587a73d4a7fa58afa85f4b6c7c7"},"schema_version":"1.0","source":{"id":"1707.09879","kind":"arxiv","version":1}},"canonical_sha256":"301df34db97ceeccfc1a6a4bac01ec615d981a533690298408ed1ce6fdf1b0b9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"301df34db97ceeccfc1a6a4bac01ec615d981a533690298408ed1ce6fdf1b0b9","first_computed_at":"2026-05-18T00:39:07.906263Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:39:07.906263Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5DcUkmXI/0eFs8MH7DtRjK88C356+s+p/4ZkohukldanFc6f7x26kcVxxpjpEuW6ELvglNl84lMeGSDBj/LnDw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:39:07.907047Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.09879","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:24d9b16fd73f15fff42cc6e327402c24cfcb7d739212d71b5b789cd70a2454b6","sha256:f7fa85f6d60cb8647ef976c8eb5d381b789d3e850014bb40fe2af99d3df5e833"],"state_sha256":"3c35ecde9c3ac94e8dbae9f2a992b5b08d0de96191db1a29c00c85c410fd9a10"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1LFWbHusLhmEP3IHvBGj8AovLFOgyuvB/uddfXEf4192MRs2js2c3AMIMyJNpW838L0dODZhNx3Vyyas6o84Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-12T09:46:19.644591Z","bundle_sha256":"67db836ad95910d0e1ff55896a46b0ab4dbe75611b772ed2baacd115017d9b88"}}