{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:U77OXXT7EOISGA4TXCT5RV4XV7","short_pith_number":"pith:U77OXXT7","canonical_record":{"source":{"id":"1907.05791","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-07-10T23:57:30Z","cross_cats_sorted":[],"title_canon_sha256":"2417263109f293d31a702bcddca0c143d633e3db74e73c442e8bd9fd9bc3c208","abstract_canon_sha256":"bcf9c12dc0728931019521e7365176f23ba6d37dd69ff93b8256bdc0770f5c1d"},"schema_version":"1.0"},"canonical_sha256":"a7feebde7f2391230393b8a7d8d797afea150d456ca1a9696d2589ed53a8e096","source":{"kind":"arxiv","id":"1907.05791","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.05791","created_at":"2026-05-17T23:40:29Z"},{"alias_kind":"arxiv_version","alias_value":"1907.05791v2","created_at":"2026-05-17T23:40:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.05791","created_at":"2026-05-17T23:40:29Z"},{"alias_kind":"pith_short_12","alias_value":"U77OXXT7EOIS","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"U77OXXT7EOISGA4T","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"U77OXXT7","created_at":"2026-05-18T12:33:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:U77OXXT7EOISGA4TXCT5RV4XV7","target":"record","payload":{"canonical_record":{"source":{"id":"1907.05791","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-07-10T23:57:30Z","cross_cats_sorted":[],"title_canon_sha256":"2417263109f293d31a702bcddca0c143d633e3db74e73c442e8bd9fd9bc3c208","abstract_canon_sha256":"bcf9c12dc0728931019521e7365176f23ba6d37dd69ff93b8256bdc0770f5c1d"},"schema_version":"1.0"},"canonical_sha256":"a7feebde7f2391230393b8a7d8d797afea150d456ca1a9696d2589ed53a8e096","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:40:29.539028Z","signature_b64":"ATqR0RqOzKFZScCxXGSZdkXcbegEUrbSPWTnGKKgO1M06NRFZAyBG18sAMmIYXMLSRbv2eKpfnLE0AQzDzRlDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a7feebde7f2391230393b8a7d8d797afea150d456ca1a9696d2589ed53a8e096","last_reissued_at":"2026-05-17T23:40:29.538369Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:40:29.538369Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.05791","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:40:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"an0R6Z55USKQfNLls4UTtYnPMacjwXMXrbRiUjzVR/uQBAuLfwKb5S2Q3ZcepjDkbS1CloTf7aYvuHEZbWxoDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T10:07:52.405332Z"},"content_sha256":"9b3671cf72bdeb7fa0dce21f1231b0371f3ad9d015b6c92ade983a103e918fca","schema_version":"1.0","event_id":"sha256:9b3671cf72bdeb7fa0dce21f1231b0371f3ad9d015b6c92ade983a103e918fca"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:U77OXXT7EOISGA4TXCT5RV4XV7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Francisco Guzm\\'an, Holger Schwenk, Hongyu Gong, Shuo Sun, Vishrav Chaudhary","submitted_at":"2019-07-10T23:57:30Z","abstract_excerpt":"We present an approach based on multilingual sentence embeddings to automatically extract parallel sentences from the content of Wikipedia articles in 85 languages, including several dialects or low-resource languages. We do not limit the the extraction process to alignments with English, but systematically consider all possible language pairs. In total, we are able to extract 135M parallel sentences for 1620 different language pairs, out of which only 34M are aligned with English. This corpus of parallel sentences is freely available at https://github.com/facebookresearch/LASER/tree/master/ta"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.05791","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:40:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BGWJCmE9jILxLoi5flnbHnW3/HmBGSxGsnYdxHFx2SjSkDUdqdePDN9+dfbAst8SDoKf/KfaM52fquY2Yn0vBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T10:07:52.406070Z"},"content_sha256":"71c7f114bbc06dbd5016f0841ad20563be7a86cae84234ba55fd605104289238","schema_version":"1.0","event_id":"sha256:71c7f114bbc06dbd5016f0841ad20563be7a86cae84234ba55fd605104289238"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/U77OXXT7EOISGA4TXCT5RV4XV7/bundle.json","state_url":"https://pith.science/pith/U77OXXT7EOISGA4TXCT5RV4XV7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/U77OXXT7EOISGA4TXCT5RV4XV7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T10:07:52Z","links":{"resolver":"https://pith.science/pith/U77OXXT7EOISGA4TXCT5RV4XV7","bundle":"https://pith.science/pith/U77OXXT7EOISGA4TXCT5RV4XV7/bundle.json","state":"https://pith.science/pith/U77OXXT7EOISGA4TXCT5RV4XV7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/U77OXXT7EOISGA4TXCT5RV4XV7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:U77OXXT7EOISGA4TXCT5RV4XV7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bcf9c12dc0728931019521e7365176f23ba6d37dd69ff93b8256bdc0770f5c1d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-07-10T23:57:30Z","title_canon_sha256":"2417263109f293d31a702bcddca0c143d633e3db74e73c442e8bd9fd9bc3c208"},"schema_version":"1.0","source":{"id":"1907.05791","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.05791","created_at":"2026-05-17T23:40:29Z"},{"alias_kind":"arxiv_version","alias_value":"1907.05791v2","created_at":"2026-05-17T23:40:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.05791","created_at":"2026-05-17T23:40:29Z"},{"alias_kind":"pith_short_12","alias_value":"U77OXXT7EOIS","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"U77OXXT7EOISGA4T","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"U77OXXT7","created_at":"2026-05-18T12:33:30Z"}],"graph_snapshots":[{"event_id":"sha256:71c7f114bbc06dbd5016f0841ad20563be7a86cae84234ba55fd605104289238","target":"graph","created_at":"2026-05-17T23:40:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present an approach based on multilingual sentence embeddings to automatically extract parallel sentences from the content of Wikipedia articles in 85 languages, including several dialects or low-resource languages. We do not limit the the extraction process to alignments with English, but systematically consider all possible language pairs. In total, we are able to extract 135M parallel sentences for 1620 different language pairs, out of which only 34M are aligned with English. This corpus of parallel sentences is freely available at https://github.com/facebookresearch/LASER/tree/master/ta","authors_text":"Francisco Guzm\\'an, Holger Schwenk, Hongyu Gong, Shuo Sun, Vishrav Chaudhary","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-07-10T23:57:30Z","title":"WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.05791","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9b3671cf72bdeb7fa0dce21f1231b0371f3ad9d015b6c92ade983a103e918fca","target":"record","created_at":"2026-05-17T23:40:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bcf9c12dc0728931019521e7365176f23ba6d37dd69ff93b8256bdc0770f5c1d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-07-10T23:57:30Z","title_canon_sha256":"2417263109f293d31a702bcddca0c143d633e3db74e73c442e8bd9fd9bc3c208"},"schema_version":"1.0","source":{"id":"1907.05791","kind":"arxiv","version":2}},"canonical_sha256":"a7feebde7f2391230393b8a7d8d797afea150d456ca1a9696d2589ed53a8e096","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a7feebde7f2391230393b8a7d8d797afea150d456ca1a9696d2589ed53a8e096","first_computed_at":"2026-05-17T23:40:29.538369Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:40:29.538369Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ATqR0RqOzKFZScCxXGSZdkXcbegEUrbSPWTnGKKgO1M06NRFZAyBG18sAMmIYXMLSRbv2eKpfnLE0AQzDzRlDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:40:29.539028Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.05791","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9b3671cf72bdeb7fa0dce21f1231b0371f3ad9d015b6c92ade983a103e918fca","sha256:71c7f114bbc06dbd5016f0841ad20563be7a86cae84234ba55fd605104289238"],"state_sha256":"04ec1df44b47ac8c97b387902d3484ca9427cac6f08974175aee69096b907276"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ma8qsOurtLNNZwLvsTiZM97V699uI2Amx78p329t6YZQ6SMamJMLwwIcVW+V++Rhgcm/KTv9XyWEngJfL4IuCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T10:07:52.409782Z","bundle_sha256":"3bf20ae41dcb0c20e66b1471c0129670254e932d7a6cc9065cf2bbb4da588e74"}}