{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2021:ZLMQXMVPQC2QWRLUP5RH353WQW","short_pith_number":"pith:ZLMQXMVP","schema_version":"1.0","canonical_sha256":"cad90bb2af80b50b45747f627df77685a2a0f469b44e90fc121e94dcef45d6be","source":{"kind":"arxiv","id":"2103.09813","version":1},"attestation_state":"computed","paper":{"title":"Do Word Embeddings Really Understand Loughran-McDonald's Polarities?","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["q-fin.CP"],"primary_cat":"q-fin.ST","authors_text":"Charles-Albert Lehalle, Mengda Li","submitted_at":"2021-03-17T17:57:39Z","abstract_excerpt":"In this paper we perform a rigorous mathematical analysis of the word2vec model, especially when it is equipped with the Skip-gram learning scheme. Our goal is to explain how embeddings, that are now widely used in NLP (Natural Language Processing), are influenced by the distribution of terms in the documents of the considered corpus. We use a mathematical formulation to shed light on how the decision to use such a model makes implicit assumptions on the structure of the language. We show how Markovian assumptions, that we discuss, lead to a very clear theoretical understanding of the formatio"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2103.09813","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"q-fin.ST","submitted_at":"2021-03-17T17:57:39Z","cross_cats_sorted":["q-fin.CP"],"title_canon_sha256":"4320b8f0a263c71b738b8950ddd1adb884f4b3fa149b1846236b890a04f145b5","abstract_canon_sha256":"1eb6d250cc45681b73173aaeff4653455cf87a3c3366fc7326483a9faf447fff"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:06.995668Z","signature_b64":"ak89KPdKy6lTtt02RJ88qtHoMQ6a7Id1/FINVQkYXb+o5I8RnX84Z3EP7siOJykzlPLKuP2ya7pxMJCIoqqOCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cad90bb2af80b50b45747f627df77685a2a0f469b44e90fc121e94dcef45d6be","last_reissued_at":"2026-05-26T01:03:06.995113Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:06.995113Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Do Word Embeddings Really Understand Loughran-McDonald's Polarities?","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["q-fin.CP"],"primary_cat":"q-fin.ST","authors_text":"Charles-Albert Lehalle, Mengda Li","submitted_at":"2021-03-17T17:57:39Z","abstract_excerpt":"In this paper we perform a rigorous mathematical analysis of the word2vec model, especially when it is equipped with the Skip-gram learning scheme. Our goal is to explain how embeddings, that are now widely used in NLP (Natural Language Processing), are influenced by the distribution of terms in the documents of the considered corpus. We use a mathematical formulation to shed light on how the decision to use such a model makes implicit assumptions on the structure of the language. We show how Markovian assumptions, that we discuss, lead to a very clear theoretical understanding of the formatio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2103.09813","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2103.09813/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2103.09813","created_at":"2026-05-26T01:03:06.995195+00:00"},{"alias_kind":"arxiv_version","alias_value":"2103.09813v1","created_at":"2026-05-26T01:03:06.995195+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2103.09813","created_at":"2026-05-26T01:03:06.995195+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZLMQXMVPQC2Q","created_at":"2026-05-26T01:03:06.995195+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZLMQXMVPQC2QWRLU","created_at":"2026-05-26T01:03:06.995195+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZLMQXMVP","created_at":"2026-05-26T01:03:06.995195+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW","json":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW.json","graph_json":"https://pith.science/api/pith-number/ZLMQXMVPQC2QWRLUP5RH353WQW/graph.json","events_json":"https://pith.science/api/pith-number/ZLMQXMVPQC2QWRLUP5RH353WQW/events.json","paper":"https://pith.science/paper/ZLMQXMVP"},"agent_actions":{"view_html":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW","download_json":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW.json","view_paper":"https://pith.science/paper/ZLMQXMVP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2103.09813&json=true","fetch_graph":"https://pith.science/api/pith-number/ZLMQXMVPQC2QWRLUP5RH353WQW/graph.json","fetch_events":"https://pith.science/api/pith-number/ZLMQXMVPQC2QWRLUP5RH353WQW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW/action/storage_attestation","attest_author":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW/action/author_attestation","sign_citation":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW/action/citation_signature","submit_replication":"https://pith.science/pith/ZLMQXMVPQC2QWRLUP5RH353WQW/action/replication_record"}},"created_at":"2026-05-26T01:03:06.995195+00:00","updated_at":"2026-05-26T01:03:06.995195+00:00"}