{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2013:JMKHWIW7XBVVY555EB3D4KUU7Y","short_pith_number":"pith:JMKHWIW7","canonical_record":{"source":{"id":"1307.1662","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-07-05T16:52:09Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"2cd17b0bec4368d7feedb61b3c4dfeed2cab6bc95c79118cc36e83262f4cf8ce","abstract_canon_sha256":"a4e0ffbac27d7f6ca3b461967e8310e9bc79ed83e6f7039e67f24272ccce5cba"},"schema_version":"1.0"},"canonical_sha256":"4b147b22dfb86b5c77bd20763e2a94fe3a736bcc7308e8a8ce49794569304d09","source":{"kind":"arxiv","id":"1307.1662","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1307.1662","created_at":"2026-05-18T02:48:54Z"},{"alias_kind":"arxiv_version","alias_value":"1307.1662v2","created_at":"2026-05-18T02:48:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1307.1662","created_at":"2026-05-18T02:48:54Z"},{"alias_kind":"pith_short_12","alias_value":"JMKHWIW7XBVV","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_16","alias_value":"JMKHWIW7XBVVY555","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_8","alias_value":"JMKHWIW7","created_at":"2026-05-18T12:27:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2013:JMKHWIW7XBVVY555EB3D4KUU7Y","target":"record","payload":{"canonical_record":{"source":{"id":"1307.1662","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-07-05T16:52:09Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"2cd17b0bec4368d7feedb61b3c4dfeed2cab6bc95c79118cc36e83262f4cf8ce","abstract_canon_sha256":"a4e0ffbac27d7f6ca3b461967e8310e9bc79ed83e6f7039e67f24272ccce5cba"},"schema_version":"1.0"},"canonical_sha256":"4b147b22dfb86b5c77bd20763e2a94fe3a736bcc7308e8a8ce49794569304d09","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:48:54.972293Z","signature_b64":"phEHXKx0hln1O5/JZs0WcZ/DBcUb2jLZWk1AxI7zn5QM9sUeqljLxJi20GUPt6uGZ5G2yz0BPxUi1DL/dtgbBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4b147b22dfb86b5c77bd20763e2a94fe3a736bcc7308e8a8ce49794569304d09","last_reissued_at":"2026-05-18T02:48:54.971747Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:48:54.971747Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1307.1662","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:48:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0RB6dkUtszu/JIhIqynu0ABN2kiDfLs153pdoatdV1ovJHLGhfhcTfM2idWbItMbvW9jVHfK5DqEe6qjY/VMAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:16:56.056648Z"},"content_sha256":"3ecd118f5ce8eca15a61febe5472d4697d109bfa8e916a98d3b1a50a17aa7e76","schema_version":"1.0","event_id":"sha256:3ecd118f5ce8eca15a61febe5472d4697d109bfa8e916a98d3b1a50a17aa7e76"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2013:JMKHWIW7XBVVY555EB3D4KUU7Y","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Polyglot: Distributed Word Representations for Multilingual NLP","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Bryan Perozzi, Rami Al-Rfou, Steven Skiena","submitted_at":"2013-07-05T16:52:09Z","abstract_excerpt":"Distributed word representations (word embeddings) have recently contributed to competitive performance in language modeling and several NLP tasks. In this work, we train word embeddings for more than 100 languages using their corresponding Wikipedias. We quantitatively demonstrate the utility of our word embeddings by using them as the sole features for training a part of speech tagger for a subset of these languages. We find their performance to be competitive with near state-of-art methods in English, Danish and Swedish. Moreover, we investigate the semantic features captured by these embed"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1307.1662","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:48:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZrIwYwBnIEJP4+hYuYgYvDOB+gCRYZ45+4OmeMm64XNdaj73ydAm6nzfxXdgic8kmUV1BHPnyIDr3gFUHe3TDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:16:56.057294Z"},"content_sha256":"65152018fa122e3140dc2645445b45b5124c5e2856e0a1ec911f8a4350c1aafa","schema_version":"1.0","event_id":"sha256:65152018fa122e3140dc2645445b45b5124c5e2856e0a1ec911f8a4350c1aafa"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JMKHWIW7XBVVY555EB3D4KUU7Y/bundle.json","state_url":"https://pith.science/pith/JMKHWIW7XBVVY555EB3D4KUU7Y/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JMKHWIW7XBVVY555EB3D4KUU7Y/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:16:56Z","links":{"resolver":"https://pith.science/pith/JMKHWIW7XBVVY555EB3D4KUU7Y","bundle":"https://pith.science/pith/JMKHWIW7XBVVY555EB3D4KUU7Y/bundle.json","state":"https://pith.science/pith/JMKHWIW7XBVVY555EB3D4KUU7Y/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JMKHWIW7XBVVY555EB3D4KUU7Y/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2013:JMKHWIW7XBVVY555EB3D4KUU7Y","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a4e0ffbac27d7f6ca3b461967e8310e9bc79ed83e6f7039e67f24272ccce5cba","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-07-05T16:52:09Z","title_canon_sha256":"2cd17b0bec4368d7feedb61b3c4dfeed2cab6bc95c79118cc36e83262f4cf8ce"},"schema_version":"1.0","source":{"id":"1307.1662","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1307.1662","created_at":"2026-05-18T02:48:54Z"},{"alias_kind":"arxiv_version","alias_value":"1307.1662v2","created_at":"2026-05-18T02:48:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1307.1662","created_at":"2026-05-18T02:48:54Z"},{"alias_kind":"pith_short_12","alias_value":"JMKHWIW7XBVV","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_16","alias_value":"JMKHWIW7XBVVY555","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_8","alias_value":"JMKHWIW7","created_at":"2026-05-18T12:27:49Z"}],"graph_snapshots":[{"event_id":"sha256:65152018fa122e3140dc2645445b45b5124c5e2856e0a1ec911f8a4350c1aafa","target":"graph","created_at":"2026-05-18T02:48:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Distributed word representations (word embeddings) have recently contributed to competitive performance in language modeling and several NLP tasks. In this work, we train word embeddings for more than 100 languages using their corresponding Wikipedias. We quantitatively demonstrate the utility of our word embeddings by using them as the sole features for training a part of speech tagger for a subset of these languages. We find their performance to be competitive with near state-of-art methods in English, Danish and Swedish. Moreover, we investigate the semantic features captured by these embed","authors_text":"Bryan Perozzi, Rami Al-Rfou, Steven Skiena","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-07-05T16:52:09Z","title":"Polyglot: Distributed Word Representations for Multilingual NLP"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1307.1662","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3ecd118f5ce8eca15a61febe5472d4697d109bfa8e916a98d3b1a50a17aa7e76","target":"record","created_at":"2026-05-18T02:48:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a4e0ffbac27d7f6ca3b461967e8310e9bc79ed83e6f7039e67f24272ccce5cba","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-07-05T16:52:09Z","title_canon_sha256":"2cd17b0bec4368d7feedb61b3c4dfeed2cab6bc95c79118cc36e83262f4cf8ce"},"schema_version":"1.0","source":{"id":"1307.1662","kind":"arxiv","version":2}},"canonical_sha256":"4b147b22dfb86b5c77bd20763e2a94fe3a736bcc7308e8a8ce49794569304d09","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4b147b22dfb86b5c77bd20763e2a94fe3a736bcc7308e8a8ce49794569304d09","first_computed_at":"2026-05-18T02:48:54.971747Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:48:54.971747Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"phEHXKx0hln1O5/JZs0WcZ/DBcUb2jLZWk1AxI7zn5QM9sUeqljLxJi20GUPt6uGZ5G2yz0BPxUi1DL/dtgbBA==","signature_status":"signed_v1","signed_at":"2026-05-18T02:48:54.972293Z","signed_message":"canonical_sha256_bytes"},"source_id":"1307.1662","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3ecd118f5ce8eca15a61febe5472d4697d109bfa8e916a98d3b1a50a17aa7e76","sha256:65152018fa122e3140dc2645445b45b5124c5e2856e0a1ec911f8a4350c1aafa"],"state_sha256":"95f56284c3ddbc34eb8c847144cc9f838e3a97335325e8a68ca2ea7c860ab8e3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"K9EharlptpnR3Otoq7QfBDpm0HiSuebWCQHozzWoVlvmGmjohcb31SG1oTk8Rb4ubOaVHNLkZDKf9GKYQCOLAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:16:56.061339Z","bundle_sha256":"8150d79f5f15d21f7b5cc7310dd84d35699e1e4553ba89ab3fb567d5b7d07fdf"}}