{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:62UE5YT2YTUGYZTMLZJRE5K2AY","short_pith_number":"pith:62UE5YT2","schema_version":"1.0","canonical_sha256":"f6a84ee27ac4e86c666c5e5312755a061a594a5397ee5217a4adf027e0b03cf3","source":{"kind":"arxiv","id":"1811.03866","version":1},"attestation_state":"computed","paper":{"title":"Learning Semantic Representations for Novel Words: Leveraging Both Form and Context","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Hinrich Sch\\\"utze, Timo Schick","submitted_at":"2018-11-09T11:44:05Z","abstract_excerpt":"Word embeddings are a key component of high-performing natural language processing (NLP) systems, but it remains a challenge to learn good representations for novel words on the fly, i.e., for words that did not occur in the training data. The general problem setting is that word embeddings are induced on an unlabeled training corpus and then a model is trained that embeds novel words into this induced embedding space. Currently, two approaches for learning embeddings of novel words exist: (i) learning an embedding from the novel word's surface-form (e.g., subword n-grams) and (ii) learning an"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.03866","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-09T11:44:05Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"b3eb3ba95b17ca87b23940bba7ab5e7f365f86b8472c756504f63296220b7ad6","abstract_canon_sha256":"548acc4430759c8722f53cbcf339f034120869efbafacf3fbc38389e62140b43"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:11.760544Z","signature_b64":"0/RVCkoWXDAb8VTRV7lCVD5dYBIzrp39FFLY98NwUdhOF9xlXIN2Q0HipkzAlak3OhP4pkN9p8o9uFnUVEfEDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f6a84ee27ac4e86c666c5e5312755a061a594a5397ee5217a4adf027e0b03cf3","last_reissued_at":"2026-05-18T00:01:11.759750Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:11.759750Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Semantic Representations for Novel Words: Leveraging Both Form and Context","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Hinrich Sch\\\"utze, Timo Schick","submitted_at":"2018-11-09T11:44:05Z","abstract_excerpt":"Word embeddings are a key component of high-performing natural language processing (NLP) systems, but it remains a challenge to learn good representations for novel words on the fly, i.e., for words that did not occur in the training data. The general problem setting is that word embeddings are induced on an unlabeled training corpus and then a model is trained that embeds novel words into this induced embedding space. Currently, two approaches for learning embeddings of novel words exist: (i) learning an embedding from the novel word's surface-form (e.g., subword n-grams) and (ii) learning an"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.03866","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.03866","created_at":"2026-05-18T00:01:11.759854+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.03866v1","created_at":"2026-05-18T00:01:11.759854+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.03866","created_at":"2026-05-18T00:01:11.759854+00:00"},{"alias_kind":"pith_short_12","alias_value":"62UE5YT2YTUG","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_16","alias_value":"62UE5YT2YTUGYZTM","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_8","alias_value":"62UE5YT2","created_at":"2026-05-18T12:32:08.215937+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY","json":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY.json","graph_json":"https://pith.science/api/pith-number/62UE5YT2YTUGYZTMLZJRE5K2AY/graph.json","events_json":"https://pith.science/api/pith-number/62UE5YT2YTUGYZTMLZJRE5K2AY/events.json","paper":"https://pith.science/paper/62UE5YT2"},"agent_actions":{"view_html":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY","download_json":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY.json","view_paper":"https://pith.science/paper/62UE5YT2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.03866&json=true","fetch_graph":"https://pith.science/api/pith-number/62UE5YT2YTUGYZTMLZJRE5K2AY/graph.json","fetch_events":"https://pith.science/api/pith-number/62UE5YT2YTUGYZTMLZJRE5K2AY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY/action/storage_attestation","attest_author":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY/action/author_attestation","sign_citation":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY/action/citation_signature","submit_replication":"https://pith.science/pith/62UE5YT2YTUGYZTMLZJRE5K2AY/action/replication_record"}},"created_at":"2026-05-18T00:01:11.759854+00:00","updated_at":"2026-05-18T00:01:11.759854+00:00"}