{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:BSHUDPFYPIB4TWQMRHUTBCOAZY","short_pith_number":"pith:BSHUDPFY","schema_version":"1.0","canonical_sha256":"0c8f41bcb87a03c9da0c89e93089c0ce1e877faf8f725993759bb0939fa21595","source":{"kind":"arxiv","id":"1802.05365","version":2},"attestation_state":"computed","paper":{"title":"Deep contextualized word representations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Christopher Clark, Kenton Lee, Luke Zettlemoyer, Mark Neumann, Matt Gardner, Matthew E. Peters, Mohit Iyyer","submitted_at":"2018-02-15T00:05:11Z","abstract_excerpt":"We introduce a new type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Our word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pre-trained on a large text corpus. We show that these representations can be easily added to existing models and significantly improve the state of the art across six challenging NLP problems, including question answering, textual entailment and"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.05365","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-02-15T00:05:11Z","cross_cats_sorted":[],"title_canon_sha256":"f62d3b587cfc80715fc27178081d8cbd8e8de938d15ea6e1580065f7c2a65003","abstract_canon_sha256":"446461ae73e206c47c1176670eba201c0e2b160abb31e8e6ec6f38b70a584b9c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:20:21.746210Z","signature_b64":"tDo/vN5xCeCY8b7d0oTzfK6PMO15KqK8iJnml7k9pv3YUWa+URIu7pUNADFgJLEG51DTn6TIQ57sIFnJebJ3AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0c8f41bcb87a03c9da0c89e93089c0ce1e877faf8f725993759bb0939fa21595","last_reissued_at":"2026-05-18T00:20:21.745631Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:20:21.745631Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Deep contextualized word representations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Christopher Clark, Kenton Lee, Luke Zettlemoyer, Mark Neumann, Matt Gardner, Matthew E. Peters, Mohit Iyyer","submitted_at":"2018-02-15T00:05:11Z","abstract_excerpt":"We introduce a new type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across linguistic contexts (i.e., to model polysemy). Our word vectors are learned functions of the internal states of a deep bidirectional language model (biLM), which is pre-trained on a large text corpus. We show that these representations can be easily added to existing models and significantly improve the state of the art across six challenging NLP problems, including question answering, textual entailment and"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.05365","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.05365","created_at":"2026-05-18T00:20:21.745705+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.05365v2","created_at":"2026-05-18T00:20:21.745705+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.05365","created_at":"2026-05-18T00:20:21.745705+00:00"},{"alias_kind":"pith_short_12","alias_value":"BSHUDPFYPIB4","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_16","alias_value":"BSHUDPFYPIB4TWQM","created_at":"2026-05-18T12:32:16.446611+00:00"},{"alias_kind":"pith_short_8","alias_value":"BSHUDPFY","created_at":"2026-05-18T12:32:16.446611+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":39,"internal_anchor_count":27,"sample":[{"citing_arxiv_id":"1906.10256","citing_title":"Good Secretaries, Bad Truck Drivers? Occupational Gender Stereotypes in Sentiment Analysis","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"1906.11951","citing_title":"Supervise Thyself: Examining Self-Supervised Representations in Interactive Environments","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"1907.01686","citing_title":"Machine Reading Comprehension: a Literature Review","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02052","citing_title":"Patent Claim Generation by Fine-Tuning OpenAI GPT-2","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02581","citing_title":"Transfer Learning for Risk Classification of Social Media Posts: Model Evaluation Study","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"1907.03663","citing_title":"Knowledge-aware Pronoun Coreference Resolution","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07033","citing_title":"Neural Language Model Based Training Data Augmentation for Weakly Supervised Early Rumor Detection","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11512","citing_title":"Investigating Self-Attention Network for Chinese Word Segmentation","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11769","citing_title":"Automatically Learning Construction Injury Precursors from Text","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2309.10305","citing_title":"Baichuan 2: Open Large-scale Language Models","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2409.01633","citing_title":"SleepNet and DreamNet: Enriching and Reconstructing Representations for Consolidated Visual Classification","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"1909.06146","citing_title":"PubMedQA: A Dataset for Biomedical Research Question Answering","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2003.00295","citing_title":"Adaptive Federated Optimization","ref_index":190,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15104","citing_title":"From Text to Voice: A Reproducible and Verifiable Framework for Evaluating Tool Calling LLM Agents","ref_index":68,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20254","citing_title":"Efficient Table QA via TableGrid Navigation and Progressive Inference Prompting","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18768","citing_title":"ClinQueryAgent: A Conversational Agent for Population Health Management","ref_index":170,"is_internal_anchor":true},{"citing_arxiv_id":"1906.08237","citing_title":"XLNet: Generalized Autoregressive Pretraining for Language Understanding","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2102.01293","citing_title":"Scaling Laws for Transfer","ref_index":184,"is_internal_anchor":true},{"citing_arxiv_id":"1909.05858","citing_title":"CTRL: A Conditional Transformer Language Model for Controllable Generation","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2309.16671","citing_title":"Demystifying CLIP Data","ref_index":105,"is_internal_anchor":true},{"citing_arxiv_id":"2402.17762","citing_title":"Massive Activations in Large Language Models","ref_index":71,"is_internal_anchor":true},{"citing_arxiv_id":"1908.10063","citing_title":"FinBERT: Financial Sentiment Analysis with Pre-trained Language Models","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"1904.05342","citing_title":"ClinicalBERT: Modeling Clinical Notes and Predicting Hospital Readmission","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2009.08366","citing_title":"GraphCodeBERT: Pre-training Code Representations with Data Flow","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13858","citing_title":"A Hormone-inspired Emotion Layer for Transformer language models (HELT)","ref_index":55,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY","json":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY.json","graph_json":"https://pith.science/api/pith-number/BSHUDPFYPIB4TWQMRHUTBCOAZY/graph.json","events_json":"https://pith.science/api/pith-number/BSHUDPFYPIB4TWQMRHUTBCOAZY/events.json","paper":"https://pith.science/paper/BSHUDPFY"},"agent_actions":{"view_html":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY","download_json":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY.json","view_paper":"https://pith.science/paper/BSHUDPFY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.05365&json=true","fetch_graph":"https://pith.science/api/pith-number/BSHUDPFYPIB4TWQMRHUTBCOAZY/graph.json","fetch_events":"https://pith.science/api/pith-number/BSHUDPFYPIB4TWQMRHUTBCOAZY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY/action/storage_attestation","attest_author":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY/action/author_attestation","sign_citation":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY/action/citation_signature","submit_replication":"https://pith.science/pith/BSHUDPFYPIB4TWQMRHUTBCOAZY/action/replication_record"}},"created_at":"2026-05-18T00:20:21.745705+00:00","updated_at":"2026-05-18T00:20:21.745705+00:00"}