{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:I4POZDC2D4MPOCHTQGYNZ3HYYX","short_pith_number":"pith:I4POZDC2","schema_version":"1.0","canonical_sha256":"471eec8c5a1f18f708f381b0dcecf8c5d8ca2fa47dbc9c502594103d511d274e","source":{"kind":"arxiv","id":"1810.06306","version":1},"attestation_state":"computed","paper":{"title":"Improving Topic Models with Latent Feature Word Representations","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Dat Quoc Nguyen, Lan Du, Mark Johnson, Richard Billingsley","submitted_at":"2018-10-15T12:34:05Z","abstract_excerpt":"Probabilistic topic models are widely used to discover latent topics in document collections, while latent feature vector representations of words have been used to obtain high performance in many NLP tasks. In this paper, we extend two different Dirichlet multinomial topic models by incorporating latent feature vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Experimental results show that by using information from the external corpora, our new models produce significant improvements on topic coherence, document cluste"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.06306","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2018-10-15T12:34:05Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"470653fc6656227e86ea8ea2bdeb037907a68deec35fea41ef732860167a56d2","abstract_canon_sha256":"2c7c2d8473097dde5a5dc5a75a75872e37399b8fae5e857fd1f8dd8dc451e1e6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:03:20.869403Z","signature_b64":"HfJnVZ1EkDjCxjU+CZ+Di2jh24mtym3DMoGs3b9k11o+lJ6inbU9TSQOZTVTZgiJkiK8OzK8qsGY1zA9QpCXAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"471eec8c5a1f18f708f381b0dcecf8c5d8ca2fa47dbc9c502594103d511d274e","last_reissued_at":"2026-05-18T00:03:20.868935Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:03:20.868935Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Improving Topic Models with Latent Feature Word Representations","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Dat Quoc Nguyen, Lan Du, Mark Johnson, Richard Billingsley","submitted_at":"2018-10-15T12:34:05Z","abstract_excerpt":"Probabilistic topic models are widely used to discover latent topics in document collections, while latent feature vector representations of words have been used to obtain high performance in many NLP tasks. In this paper, we extend two different Dirichlet multinomial topic models by incorporating latent feature vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Experimental results show that by using information from the external corpora, our new models produce significant improvements on topic coherence, document cluste"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.06306","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.06306","created_at":"2026-05-18T00:03:20.869001+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.06306v1","created_at":"2026-05-18T00:03:20.869001+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.06306","created_at":"2026-05-18T00:03:20.869001+00:00"},{"alias_kind":"pith_short_12","alias_value":"I4POZDC2D4MP","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"I4POZDC2D4MPOCHT","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"I4POZDC2","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX","json":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX.json","graph_json":"https://pith.science/api/pith-number/I4POZDC2D4MPOCHTQGYNZ3HYYX/graph.json","events_json":"https://pith.science/api/pith-number/I4POZDC2D4MPOCHTQGYNZ3HYYX/events.json","paper":"https://pith.science/paper/I4POZDC2"},"agent_actions":{"view_html":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX","download_json":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX.json","view_paper":"https://pith.science/paper/I4POZDC2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.06306&json=true","fetch_graph":"https://pith.science/api/pith-number/I4POZDC2D4MPOCHTQGYNZ3HYYX/graph.json","fetch_events":"https://pith.science/api/pith-number/I4POZDC2D4MPOCHTQGYNZ3HYYX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX/action/storage_attestation","attest_author":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX/action/author_attestation","sign_citation":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX/action/citation_signature","submit_replication":"https://pith.science/pith/I4POZDC2D4MPOCHTQGYNZ3HYYX/action/replication_record"}},"created_at":"2026-05-18T00:03:20.869001+00:00","updated_at":"2026-05-18T00:03:20.869001+00:00"}