{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:2PHN6JN4QR4KLWWMX22UNVAVOF","short_pith_number":"pith:2PHN6JN4","schema_version":"1.0","canonical_sha256":"d3cedf25bc8478a5daccbeb546d41571616403ddf49b89b82d5c3fc20ce04fe7","source":{"kind":"arxiv","id":"1402.3722","version":1},"attestation_state":"computed","paper":{"title":"word2vec Explained: deriving Mikolov et al.'s negative-sampling word-embedding method","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Omer Levy, Yoav Goldberg","submitted_at":"2014-02-15T21:03:02Z","abstract_excerpt":"The word2vec software of Tomas Mikolov and colleagues (https://code.google.com/p/word2vec/ ) has gained a lot of traction lately, and provides state-of-the-art word embeddings. The learning models behind the software are described in two research papers. We found the description of the models in these papers to be somewhat cryptic and hard to follow. While the motivations and presentation may be obvious to the neural-networks language-modeling crowd, we had to struggle quite a bit to figure out the rationale behind the equations.\n  This note is an attempt to explain equation (4) (negative samp"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1402.3722","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-02-15T21:03:02Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"581a9920617def6fac4210fcb56e23106f130c1509caa0779190bc6b7a5d4ee9","abstract_canon_sha256":"1d33241462c2f1e5003141f9646b7bbcf423b6672c5550c73b4e75c328d9fb20"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:58:55.743078Z","signature_b64":"b9MQhhpMFIuTO1Q4xsG967gb8ODoe9GnpFqqETKJRAfoXDp0TOZl0/IsOoCUyKvn6rnNelB0JgTNbyZgcxIzDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d3cedf25bc8478a5daccbeb546d41571616403ddf49b89b82d5c3fc20ce04fe7","last_reissued_at":"2026-05-18T02:58:55.742568Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:58:55.742568Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"word2vec Explained: deriving Mikolov et al.'s negative-sampling word-embedding method","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Omer Levy, Yoav Goldberg","submitted_at":"2014-02-15T21:03:02Z","abstract_excerpt":"The word2vec software of Tomas Mikolov and colleagues (https://code.google.com/p/word2vec/ ) has gained a lot of traction lately, and provides state-of-the-art word embeddings. The learning models behind the software are described in two research papers. We found the description of the models in these papers to be somewhat cryptic and hard to follow. While the motivations and presentation may be obvious to the neural-networks language-modeling crowd, we had to struggle quite a bit to figure out the rationale behind the equations.\n  This note is an attempt to explain equation (4) (negative samp"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1402.3722","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1402.3722","created_at":"2026-05-18T02:58:55.742648+00:00"},{"alias_kind":"arxiv_version","alias_value":"1402.3722v1","created_at":"2026-05-18T02:58:55.742648+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1402.3722","created_at":"2026-05-18T02:58:55.742648+00:00"},{"alias_kind":"pith_short_12","alias_value":"2PHN6JN4QR4K","created_at":"2026-05-18T12:28:11.866339+00:00"},{"alias_kind":"pith_short_16","alias_value":"2PHN6JN4QR4KLWWM","created_at":"2026-05-18T12:28:11.866339+00:00"},{"alias_kind":"pith_short_8","alias_value":"2PHN6JN4","created_at":"2026-05-18T12:28:11.866339+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"1906.09068","citing_title":"Simplex2Vec embeddings for community detection in simplicial complexes","ref_index":47,"is_internal_anchor":true},{"citing_arxiv_id":"1907.05340","citing_title":"Neural or Statistical: An Empirical Study on Language Models for Chinese Input Recommendation on Mobile","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07766","citing_title":"Flatter is better: Percentile Transformations for Recommender Systems","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11031","citing_title":"Not All Bugs Are the Same: Understanding, Characterizing, and Classifying the Root Cause of Bugs","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2401.03717","citing_title":"Universal Time-Series Representation Learning: A Survey","ref_index":71,"is_internal_anchor":true},{"citing_arxiv_id":"1909.01066","citing_title":"Language Models as Knowledge Bases?","ref_index":194,"is_internal_anchor":true},{"citing_arxiv_id":"2311.03658","citing_title":"The Linear Representation Hypothesis and the Geometry of Large Language Models","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06216","citing_title":"TIDE: Every Layer Knows the Token Beneath the Context","ref_index":63,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF","json":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF.json","graph_json":"https://pith.science/api/pith-number/2PHN6JN4QR4KLWWMX22UNVAVOF/graph.json","events_json":"https://pith.science/api/pith-number/2PHN6JN4QR4KLWWMX22UNVAVOF/events.json","paper":"https://pith.science/paper/2PHN6JN4"},"agent_actions":{"view_html":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF","download_json":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF.json","view_paper":"https://pith.science/paper/2PHN6JN4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1402.3722&json=true","fetch_graph":"https://pith.science/api/pith-number/2PHN6JN4QR4KLWWMX22UNVAVOF/graph.json","fetch_events":"https://pith.science/api/pith-number/2PHN6JN4QR4KLWWMX22UNVAVOF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF/action/storage_attestation","attest_author":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF/action/author_attestation","sign_citation":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF/action/citation_signature","submit_replication":"https://pith.science/pith/2PHN6JN4QR4KLWWMX22UNVAVOF/action/replication_record"}},"created_at":"2026-05-18T02:58:55.742648+00:00","updated_at":"2026-05-18T02:58:55.742648+00:00"}