{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:K4CXM4ONN5ORIKW5XYUXDLLZBO","short_pith_number":"pith:K4CXM4ON","canonical_record":{"source":{"id":"1802.06893","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-19T22:32:47Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"3e8807f50b9e928c95aef87aba41a54d9ae645e3d29c4d3381747f6c2984ee98","abstract_canon_sha256":"b9ec6b62acb5c1ad3cf19cfeec8c72a9627ea29f0dc1af1cb0060cb43f1d61ef"},"schema_version":"1.0"},"canonical_sha256":"57057671cd6f5d142addbe2971ad790b945320b41b800e8b38d2166c7b1e08e5","source":{"kind":"arxiv","id":"1802.06893","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.06893","created_at":"2026-05-18T00:19:53Z"},{"alias_kind":"arxiv_version","alias_value":"1802.06893v2","created_at":"2026-05-18T00:19:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.06893","created_at":"2026-05-18T00:19:53Z"},{"alias_kind":"pith_short_12","alias_value":"K4CXM4ONN5OR","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_16","alias_value":"K4CXM4ONN5ORIKW5","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_8","alias_value":"K4CXM4ON","created_at":"2026-05-18T12:32:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:K4CXM4ONN5ORIKW5XYUXDLLZBO","target":"record","payload":{"canonical_record":{"source":{"id":"1802.06893","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-19T22:32:47Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"3e8807f50b9e928c95aef87aba41a54d9ae645e3d29c4d3381747f6c2984ee98","abstract_canon_sha256":"b9ec6b62acb5c1ad3cf19cfeec8c72a9627ea29f0dc1af1cb0060cb43f1d61ef"},"schema_version":"1.0"},"canonical_sha256":"57057671cd6f5d142addbe2971ad790b945320b41b800e8b38d2166c7b1e08e5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:19:53.064213Z","signature_b64":"fUpcqT9mGClLCMDo6MQvkHPjlM/zDPVhrTS/OAS08JKb/V7mJtp0tcqDT6Eh16x5jDuaXZdGDkttpEqLUxYMDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"57057671cd6f5d142addbe2971ad790b945320b41b800e8b38d2166c7b1e08e5","last_reissued_at":"2026-05-18T00:19:53.063430Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:19:53.063430Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1802.06893","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:19:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PLahwU2XfbY4xD2WdzzK7eFDqDykueCW+H3PdPoik+Oldhtx/DAvLJHbbjmzfT7pN4+7PJAT+AHYGGDeq1ueBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T22:33:29.133134Z"},"content_sha256":"82dede85b8aecfdaa3eafa69a7ac911b8e173957416ed8245efba06eca4dd17c","schema_version":"1.0","event_id":"sha256:82dede85b8aecfdaa3eafa69a7ac911b8e173957416ed8245efba06eca4dd17c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:K4CXM4ONN5ORIKW5XYUXDLLZBO","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Word Vectors for 157 Languages","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Armand Joulin, Edouard Grave, Piotr Bojanowski, Prakhar Gupta, Tomas Mikolov","submitted_at":"2018-02-19T22:32:47Z","abstract_excerpt":"Distributed word representations, or word vectors, have recently been applied to many tasks in natural language processing, leading to state-of-the-art performance. A key ingredient to the successful application of these representations is to train them on very large corpora, and use these pre-trained models in downstream tasks. In this paper, we describe how we trained such high quality word representations for 157 languages. We used two sources of data to train these models: the free online encyclopedia Wikipedia and data from the common crawl project. We also introduce three new word analog"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.06893","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:19:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dOCuLRlcfPw5s3UfNgl2PBgLHjD8RElC11unrQR6SoJQEHubOKsRre1K5LUMy/tFfHJ2JGH33FxRTprMNlL1Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T22:33:29.133794Z"},"content_sha256":"f5565c89eadc0fa669fab3498ee6429292a1dc97060bb1af861ffb0d9959ebfb","schema_version":"1.0","event_id":"sha256:f5565c89eadc0fa669fab3498ee6429292a1dc97060bb1af861ffb0d9959ebfb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO/bundle.json","state_url":"https://pith.science/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T22:33:29Z","links":{"resolver":"https://pith.science/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO","bundle":"https://pith.science/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO/bundle.json","state":"https://pith.science/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO/state.json","well_known_bundle":"https://pith.science/.well-known/pith/K4CXM4ONN5ORIKW5XYUXDLLZBO/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:K4CXM4ONN5ORIKW5XYUXDLLZBO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b9ec6b62acb5c1ad3cf19cfeec8c72a9627ea29f0dc1af1cb0060cb43f1d61ef","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-19T22:32:47Z","title_canon_sha256":"3e8807f50b9e928c95aef87aba41a54d9ae645e3d29c4d3381747f6c2984ee98"},"schema_version":"1.0","source":{"id":"1802.06893","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1802.06893","created_at":"2026-05-18T00:19:53Z"},{"alias_kind":"arxiv_version","alias_value":"1802.06893v2","created_at":"2026-05-18T00:19:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.06893","created_at":"2026-05-18T00:19:53Z"},{"alias_kind":"pith_short_12","alias_value":"K4CXM4ONN5OR","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_16","alias_value":"K4CXM4ONN5ORIKW5","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_8","alias_value":"K4CXM4ON","created_at":"2026-05-18T12:32:33Z"}],"graph_snapshots":[{"event_id":"sha256:f5565c89eadc0fa669fab3498ee6429292a1dc97060bb1af861ffb0d9959ebfb","target":"graph","created_at":"2026-05-18T00:19:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Distributed word representations, or word vectors, have recently been applied to many tasks in natural language processing, leading to state-of-the-art performance. A key ingredient to the successful application of these representations is to train them on very large corpora, and use these pre-trained models in downstream tasks. In this paper, we describe how we trained such high quality word representations for 157 languages. We used two sources of data to train these models: the free online encyclopedia Wikipedia and data from the common crawl project. We also introduce three new word analog","authors_text":"Armand Joulin, Edouard Grave, Piotr Bojanowski, Prakhar Gupta, Tomas Mikolov","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-19T22:32:47Z","title":"Learning Word Vectors for 157 Languages"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.06893","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:82dede85b8aecfdaa3eafa69a7ac911b8e173957416ed8245efba06eca4dd17c","target":"record","created_at":"2026-05-18T00:19:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b9ec6b62acb5c1ad3cf19cfeec8c72a9627ea29f0dc1af1cb0060cb43f1d61ef","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-19T22:32:47Z","title_canon_sha256":"3e8807f50b9e928c95aef87aba41a54d9ae645e3d29c4d3381747f6c2984ee98"},"schema_version":"1.0","source":{"id":"1802.06893","kind":"arxiv","version":2}},"canonical_sha256":"57057671cd6f5d142addbe2971ad790b945320b41b800e8b38d2166c7b1e08e5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"57057671cd6f5d142addbe2971ad790b945320b41b800e8b38d2166c7b1e08e5","first_computed_at":"2026-05-18T00:19:53.063430Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:19:53.063430Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fUpcqT9mGClLCMDo6MQvkHPjlM/zDPVhrTS/OAS08JKb/V7mJtp0tcqDT6Eh16x5jDuaXZdGDkttpEqLUxYMDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:19:53.064213Z","signed_message":"canonical_sha256_bytes"},"source_id":"1802.06893","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:82dede85b8aecfdaa3eafa69a7ac911b8e173957416ed8245efba06eca4dd17c","sha256:f5565c89eadc0fa669fab3498ee6429292a1dc97060bb1af861ffb0d9959ebfb"],"state_sha256":"0abb432bba7941bf5b89ce1e3c69d88b9d9b5468cbe0b5dadced5ac8dbd149f3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Mm9Y2F8EdxpAUILGn9QFADqmyNNhLh5auhGou/gPNXbFFm2Wt8QZXE7fpwWSraBclZrMLMuM/WNmxcPkACCFDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T22:33:29.137430Z","bundle_sha256":"956b00f4627c0802b0efffcb7b8639f2fdf87b5296315617dadb60d487a304ab"}}