{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:7HSQPZGW2T4GK23P7LM73TN4CQ","short_pith_number":"pith:7HSQPZGW","canonical_record":{"source":{"id":"1711.00681","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-02T11:00:09Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"6141b6a73d9ca982fce21d64d46085d1381cec2e968d26ffa2784615bfe86970","abstract_canon_sha256":"2ddb9db9896f8dd1e05db10c25beb3ddb128d323ca069da12eb2fd823366f67d"},"schema_version":"1.0"},"canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","source":{"kind":"arxiv","id":"1711.00681","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.00681","created_at":"2026-05-17T23:49:53Z"},{"alias_kind":"arxiv_version","alias_value":"1711.00681v3","created_at":"2026-05-17T23:49:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.00681","created_at":"2026-05-17T23:49:53Z"},{"alias_kind":"pith_short_12","alias_value":"7HSQPZGW2T4G","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"7HSQPZGW2T4GK23P","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"7HSQPZGW","created_at":"2026-05-18T12:31:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:7HSQPZGW2T4GK23P7LM73TN4CQ","target":"record","payload":{"canonical_record":{"source":{"id":"1711.00681","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-02T11:00:09Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"6141b6a73d9ca982fce21d64d46085d1381cec2e968d26ffa2784615bfe86970","abstract_canon_sha256":"2ddb9db9896f8dd1e05db10c25beb3ddb128d323ca069da12eb2fd823366f67d"},"schema_version":"1.0"},"canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:53.124371Z","signature_b64":"/kNoiKic9QDoshRnHuBBjFXpDo7Xj1beRX5DQbtZp1UfvBcO7txc9GG6I+8GJX/7VtOCzPV/1te2ySeWjKhDCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","last_reissued_at":"2026-05-17T23:49:53.123890Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:53.123890Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1711.00681","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BSBH2KhJiEey8qenPAWc4LwST79XU2tS0ZsiOfHyCBeNcPHhSrqac8CQ20J9umXcDYQ0F8elICFe8hkKORMKDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T23:56:04.079605Z"},"content_sha256":"e5374a02655cbfd3fa53d9d97b73ede2d91d284d0a6e8f8a87403b7019fa6fe7","schema_version":"1.0","event_id":"sha256:e5374a02655cbfd3fa53d9d97b73ede2d91d284d0a6e8f8a87403b7019fa6fe7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:7HSQPZGW2T4GK23P7LM73TN4CQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Extracting an English-Persian Parallel Corpus from Comparable Corpora","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Akbar Karimi, Bahram Sadeghi Bigham, Ebrahim Ansari","submitted_at":"2017-11-02T11:00:09Z","abstract_excerpt":"Parallel data are an important part of a reliable Statistical Machine Translation (SMT) system. The more of these data are available, the better the quality of the SMT system. However, for some language pairs such as Persian-English, parallel sources of this kind are scarce. In this paper, a bidirectional method is proposed to extract parallel sentences from English and Persian document aligned Wikipedia. Two machine translation systems are employed to translate from Persian to English and the reverse after which an IR system is used to measure the similarity of the translated sentences. Addin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.00681","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uAon/iIGefFicK5MrslJaS81Dm/QRvhPe+CELAsErgW2JMlAnzTH8oI96RqF7T/2xTaWqCB50pwRl/m0/6SZBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T23:56:04.080261Z"},"content_sha256":"73f840826c1a399169531f8c6b62c46a8403717753a6c9213f09870a27f5b1da","schema_version":"1.0","event_id":"sha256:73f840826c1a399169531f8c6b62c46a8403717753a6c9213f09870a27f5b1da"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/bundle.json","state_url":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T23:56:04Z","links":{"resolver":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ","bundle":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/bundle.json","state":"https://pith.science/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7HSQPZGW2T4GK23P7LM73TN4CQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:7HSQPZGW2T4GK23P7LM73TN4CQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2ddb9db9896f8dd1e05db10c25beb3ddb128d323ca069da12eb2fd823366f67d","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-02T11:00:09Z","title_canon_sha256":"6141b6a73d9ca982fce21d64d46085d1381cec2e968d26ffa2784615bfe86970"},"schema_version":"1.0","source":{"id":"1711.00681","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.00681","created_at":"2026-05-17T23:49:53Z"},{"alias_kind":"arxiv_version","alias_value":"1711.00681v3","created_at":"2026-05-17T23:49:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.00681","created_at":"2026-05-17T23:49:53Z"},{"alias_kind":"pith_short_12","alias_value":"7HSQPZGW2T4G","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_16","alias_value":"7HSQPZGW2T4GK23P","created_at":"2026-05-18T12:31:05Z"},{"alias_kind":"pith_short_8","alias_value":"7HSQPZGW","created_at":"2026-05-18T12:31:05Z"}],"graph_snapshots":[{"event_id":"sha256:73f840826c1a399169531f8c6b62c46a8403717753a6c9213f09870a27f5b1da","target":"graph","created_at":"2026-05-17T23:49:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Parallel data are an important part of a reliable Statistical Machine Translation (SMT) system. The more of these data are available, the better the quality of the SMT system. However, for some language pairs such as Persian-English, parallel sources of this kind are scarce. In this paper, a bidirectional method is proposed to extract parallel sentences from English and Persian document aligned Wikipedia. Two machine translation systems are employed to translate from Persian to English and the reverse after which an IR system is used to measure the similarity of the translated sentences. Addin","authors_text":"Akbar Karimi, Bahram Sadeghi Bigham, Ebrahim Ansari","cross_cats":["cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-02T11:00:09Z","title":"Extracting an English-Persian Parallel Corpus from Comparable Corpora"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.00681","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e5374a02655cbfd3fa53d9d97b73ede2d91d284d0a6e8f8a87403b7019fa6fe7","target":"record","created_at":"2026-05-17T23:49:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2ddb9db9896f8dd1e05db10c25beb3ddb128d323ca069da12eb2fd823366f67d","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-02T11:00:09Z","title_canon_sha256":"6141b6a73d9ca982fce21d64d46085d1381cec2e968d26ffa2784615bfe86970"},"schema_version":"1.0","source":{"id":"1711.00681","kind":"arxiv","version":3}},"canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f9e507e4d6d4f8656b6ffad9fdcdbc140915c611816953a1acd8f803e34ba75a","first_computed_at":"2026-05-17T23:49:53.123890Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:49:53.123890Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/kNoiKic9QDoshRnHuBBjFXpDo7Xj1beRX5DQbtZp1UfvBcO7txc9GG6I+8GJX/7VtOCzPV/1te2ySeWjKhDCQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:49:53.124371Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.00681","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e5374a02655cbfd3fa53d9d97b73ede2d91d284d0a6e8f8a87403b7019fa6fe7","sha256:73f840826c1a399169531f8c6b62c46a8403717753a6c9213f09870a27f5b1da"],"state_sha256":"4c246451953a7f6a492f4d9bce21e86f601eb0edd8602fa7fbddbbeeb1613ed8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3KaoxX6F6zuxxMHTkeUcXK0wYA9OQ09b1J98zYVTd2RXZ3W08uuh7WT4ki+WCrProlRnNPJZ/JhMp+AIYCDrDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T23:56:04.083795Z","bundle_sha256":"f462c03f1482ac7d6403074c3126e8b173eeeb4f03276ab453a256c64aa9cc53"}}