{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:7HSQPZGW2T4GK23P7LM73TN4CQ","short_pith_number":"pith:7HSQPZGW","schema_version":"1.0","canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","source":{"kind":"arxiv","id":"1711.00681","version":3},"attestation_state":"computed","paper":{"title":"Extracting an English-Persian Parallel Corpus from Comparable Corpora","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Akbar Karimi, Bahram Sadeghi Bigham, Ebrahim Ansari","submitted_at":"2017-11-02T11:00:09Z","abstract_excerpt":"Parallel data are an important part of a reliable Statistical Machine Translation (SMT) system. The more of these data are available, the better the quality of the SMT system. However, for some language pairs such as Persian-English, parallel sources of this kind are scarce. In this paper, a bidirectional method is proposed to extract parallel sentences from English and Persian document aligned Wikipedia. Two machine translation systems are employed to translate from Persian to English and the reverse after which an IR system is used to measure the similarity of the translated sentences. Addin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.00681","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-02T11:00:09Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"6141b6a73d9ca982fce21d64d46085d1381cec2e968d26ffa2784615bfe86970","abstract_canon_sha256":"2ddb9db9896f8dd1e05db10c25beb3ddb128d323ca069da12eb2fd823366f67d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:53.124371Z","signature_b64":"/kNoiKic9QDoshRnHuBBjFXpDo7Xj1beRX5DQbtZp1UfvBcO7txc9GG6I+8GJX/7VtOCzPV/1te2ySeWjKhDCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","last_reissued_at":"2026-05-17T23:49:53.123890Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:53.123890Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Extracting an English-Persian Parallel Corpus from Comparable Corpora","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Akbar Karimi, Bahram Sadeghi Bigham, Ebrahim Ansari","submitted_at":"2017-11-02T11:00:09Z","abstract_excerpt":"Parallel data are an important part of a reliable Statistical Machine Translation (SMT) system. The more of these data are available, the better the quality of the SMT system. However, for some language pairs such as Persian-English, parallel sources of this kind are scarce. In this paper, a bidirectional method is proposed to extract parallel sentences from English and Persian document aligned Wikipedia. Two machine translation systems are employed to translate from Persian to English and the reverse after which an IR system is used to measure the similarity of the translated sentences. Addin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.00681","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.00681","created_at":"2026-05-17T23:49:53.123963+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.00681v3","created_at":"2026-05-17T23:49:53.123963+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.00681","created_at":"2026-05-17T23:49:53.123963+00:00"},{"alias_kind":"pith_short_12","alias_value":"7HSQPZGW2T4G","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_16","alias_value":"7HSQPZGW2T4GK23P","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_8","alias_value":"7HSQPZGW","created_at":"2026-05-18T12:31:05.417338+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ","json":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ.json","graph_json":"https://pith.science/api/pith-number/7HSQPZGW2T4GK23P7LM73TN4CQ/graph.json","events_json":"https://pith.science/api/pith-number/7HSQPZGW2T4GK23P7LM73TN4CQ/events.json","paper":"https://pith.science/paper/7HSQPZGW"},"agent_actions":{"view_html":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ","download_json":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ.json","view_paper":"https://pith.science/paper/7HSQPZGW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.00681&json=true","fetch_graph":"https://pith.science/api/pith-number/7HSQPZGW2T4GK23P7LM73TN4CQ/graph.json","fetch_events":"https://pith.science/api/pith-number/7HSQPZGW2T4GK23P7LM73TN4CQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/action/storage_attestation","attest_author":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/action/author_attestation","sign_citation":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/action/citation_signature","submit_replication":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/action/replication_record"}},"created_at":"2026-05-17T23:49:53.123963+00:00","updated_at":"2026-05-17T23:49:53.123963+00:00"}