{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:3VWKQGCGXPZHJGFZNKK3ALT7OJ","short_pith_number":"pith:3VWKQGCG","canonical_record":{"source":{"id":"1707.00079","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-01T01:21:22Z","cross_cats_sorted":[],"title_canon_sha256":"446bb208a8a8e8112c81623cc3dffb45a14717c9103f8811509455bab9aed921","abstract_canon_sha256":"3a4325a6ddc811614dd95c3b6532b2c54759e726b4641d29fd40117ffa0b401a"},"schema_version":"1.0"},"canonical_sha256":"dd6ca81846bbf27498b96a95b02e7f72762c657bf037242a4adbd5c1b62ce1bc","source":{"kind":"arxiv","id":"1707.00079","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.00079","created_at":"2026-05-18T00:29:18Z"},{"alias_kind":"arxiv_version","alias_value":"1707.00079v2","created_at":"2026-05-18T00:29:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.00079","created_at":"2026-05-18T00:29:18Z"},{"alias_kind":"pith_short_12","alias_value":"3VWKQGCGXPZH","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_16","alias_value":"3VWKQGCGXPZHJGFZ","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_8","alias_value":"3VWKQGCG","created_at":"2026-05-18T12:30:58Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:3VWKQGCGXPZHJGFZNKK3ALT7OJ","target":"record","payload":{"canonical_record":{"source":{"id":"1707.00079","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-01T01:21:22Z","cross_cats_sorted":[],"title_canon_sha256":"446bb208a8a8e8112c81623cc3dffb45a14717c9103f8811509455bab9aed921","abstract_canon_sha256":"3a4325a6ddc811614dd95c3b6532b2c54759e726b4641d29fd40117ffa0b401a"},"schema_version":"1.0"},"canonical_sha256":"dd6ca81846bbf27498b96a95b02e7f72762c657bf037242a4adbd5c1b62ce1bc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:18.631860Z","signature_b64":"z9TTilSBsNVqhVVfVcqS/w6RQV8dfhe+60rv4iCv0pzFBSviMinH7nm7TX9HEBt6FldqF5UpPq8GjZ2yPupUDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dd6ca81846bbf27498b96a95b02e7f72762c657bf037242a4adbd5c1b62ce1bc","last_reissued_at":"2026-05-18T00:29:18.631403Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:18.631403Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.00079","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:29:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"37a23rd4t9zyRsXLfNadc72jdWeVJtlB0V235YkXumY1DxKp7AEUPuCrPCVJWnfrZVw7ZMZjqbKK+rkNyZ1ICg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T10:11:53.706850Z"},"content_sha256":"3e063c808a27b8f55773d16c7e75ca30c632e19db294739f0f32a6105f6a3ad3","schema_version":"1.0","event_id":"sha256:3e063c808a27b8f55773d16c7e75ca30c632e19db294739f0f32a6105f6a3ad3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:3VWKQGCGXPZHJGFZNKK3ALT7OJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Synthetic Data for Neural Machine Translation of Spoken-Dialects","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Ahmed Tawfik, Hany Hassan, Mostafa Elaraby","submitted_at":"2017-07-01T01:21:22Z","abstract_excerpt":"In this paper, we introduce a novel approach to generate synthetic data for training Neural Machine Translation systems. The proposed approach transforms a given parallel corpus between a written language and a target language to a parallel corpus between a spoken dialect variant and the target language. Our approach is language independent and can be used to generate data for any variant of the source language such as slang or spoken dialect or even for a different language that is closely related to the source language.\n  The proposed approach is based on local embedding projection of distri"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.00079","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:29:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0hEBZH7MUnVMdZ5EhHzWVmUdLJC6UwYZwH8NleCQ3ClxnOpspXZT8dkwQLbxIa6d68MZvFsgb70P5DeTFArOAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T10:11:53.707210Z"},"content_sha256":"ef68f3ee4bdcba02a0c76a0d5ea0090193542ae8156e0c9db6a53a792b24b9d2","schema_version":"1.0","event_id":"sha256:ef68f3ee4bdcba02a0c76a0d5ea0090193542ae8156e0c9db6a53a792b24b9d2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ/bundle.json","state_url":"https://pith.science/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T10:11:53Z","links":{"resolver":"https://pith.science/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ","bundle":"https://pith.science/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ/bundle.json","state":"https://pith.science/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3VWKQGCGXPZHJGFZNKK3ALT7OJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:3VWKQGCGXPZHJGFZNKK3ALT7OJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3a4325a6ddc811614dd95c3b6532b2c54759e726b4641d29fd40117ffa0b401a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-01T01:21:22Z","title_canon_sha256":"446bb208a8a8e8112c81623cc3dffb45a14717c9103f8811509455bab9aed921"},"schema_version":"1.0","source":{"id":"1707.00079","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.00079","created_at":"2026-05-18T00:29:18Z"},{"alias_kind":"arxiv_version","alias_value":"1707.00079v2","created_at":"2026-05-18T00:29:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.00079","created_at":"2026-05-18T00:29:18Z"},{"alias_kind":"pith_short_12","alias_value":"3VWKQGCGXPZH","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_16","alias_value":"3VWKQGCGXPZHJGFZ","created_at":"2026-05-18T12:30:58Z"},{"alias_kind":"pith_short_8","alias_value":"3VWKQGCG","created_at":"2026-05-18T12:30:58Z"}],"graph_snapshots":[{"event_id":"sha256:ef68f3ee4bdcba02a0c76a0d5ea0090193542ae8156e0c9db6a53a792b24b9d2","target":"graph","created_at":"2026-05-18T00:29:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this paper, we introduce a novel approach to generate synthetic data for training Neural Machine Translation systems. The proposed approach transforms a given parallel corpus between a written language and a target language to a parallel corpus between a spoken dialect variant and the target language. Our approach is language independent and can be used to generate data for any variant of the source language such as slang or spoken dialect or even for a different language that is closely related to the source language.\n  The proposed approach is based on local embedding projection of distri","authors_text":"Ahmed Tawfik, Hany Hassan, Mostafa Elaraby","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-01T01:21:22Z","title":"Synthetic Data for Neural Machine Translation of Spoken-Dialects"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.00079","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3e063c808a27b8f55773d16c7e75ca30c632e19db294739f0f32a6105f6a3ad3","target":"record","created_at":"2026-05-18T00:29:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3a4325a6ddc811614dd95c3b6532b2c54759e726b4641d29fd40117ffa0b401a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-01T01:21:22Z","title_canon_sha256":"446bb208a8a8e8112c81623cc3dffb45a14717c9103f8811509455bab9aed921"},"schema_version":"1.0","source":{"id":"1707.00079","kind":"arxiv","version":2}},"canonical_sha256":"dd6ca81846bbf27498b96a95b02e7f72762c657bf037242a4adbd5c1b62ce1bc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dd6ca81846bbf27498b96a95b02e7f72762c657bf037242a4adbd5c1b62ce1bc","first_computed_at":"2026-05-18T00:29:18.631403Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:29:18.631403Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"z9TTilSBsNVqhVVfVcqS/w6RQV8dfhe+60rv4iCv0pzFBSviMinH7nm7TX9HEBt6FldqF5UpPq8GjZ2yPupUDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:29:18.631860Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.00079","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3e063c808a27b8f55773d16c7e75ca30c632e19db294739f0f32a6105f6a3ad3","sha256:ef68f3ee4bdcba02a0c76a0d5ea0090193542ae8156e0c9db6a53a792b24b9d2"],"state_sha256":"3df3b0d2b776d010cb3a423b959191bd8c47cd5fa0157b337bf2a22fec8776d4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VDjrF6BiBmTeSMfKyZqMPHmWT+2vGvMc7Ld6TzmUwBrGvg3icqNTQh6UmLJLPLtdVsPprqNKdcZBhxDJ/xopCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T10:11:53.709287Z","bundle_sha256":"3e600e64bfca5be33daafe010d74675b5364e0ceeeff151b41709f87dfe8c3b9"}}