{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:VG3E7SVYRZH5QICDAY62ZK3VSI","short_pith_number":"pith:VG3E7SVY","schema_version":"1.0","canonical_sha256":"a9b64fcab88e4fd82043063dacab759204d99f2c10520b7c882ab07b68471549","source":{"kind":"arxiv","id":"1705.07368","version":3},"attestation_state":"computed","paper":{"title":"Mixed Membership Word Embeddings for Computational Social Science","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"James Foulds","submitted_at":"2017-05-20T23:45:54Z","abstract_excerpt":"Word embeddings improve the performance of NLP systems by revealing the hidden structural relationships between words. Despite their success in many applications, word embeddings have seen very little use in computational social science NLP tasks, presumably due to their reliance on big data, and to a lack of interpretability. I propose a probabilistic model-based word embedding method which can recover interpretable embeddings, without big data. The key insight is to leverage mixed membership modeling, in which global representations are shared, but individual entities (i.e. dictionary words)"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1705.07368","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-20T23:45:54Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"0fd8aec34d4109061d368e2bd691308901d6d40cdfef7ac38f4e8b1ecf4bdab3","abstract_canon_sha256":"54bd88012a3a8960d57a3e1b3cc98f957a4094f28569a56ce1bfee3412e0a896"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:59.351330Z","signature_b64":"WZEXGlnb4orSjzYHtYUtvXZQqUZi1xRXUE06eomuDyJ0qZc+h0RvM/er4f5JMtCteORZNpdoT9F4oUrsXhK+Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a9b64fcab88e4fd82043063dacab759204d99f2c10520b7c882ab07b68471549","last_reissued_at":"2026-05-18T00:22:59.350820Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:59.350820Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mixed Membership Word Embeddings for Computational Social Science","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"James Foulds","submitted_at":"2017-05-20T23:45:54Z","abstract_excerpt":"Word embeddings improve the performance of NLP systems by revealing the hidden structural relationships between words. Despite their success in many applications, word embeddings have seen very little use in computational social science NLP tasks, presumably due to their reliance on big data, and to a lack of interpretability. I propose a probabilistic model-based word embedding method which can recover interpretable embeddings, without big data. The key insight is to leverage mixed membership modeling, in which global representations are shared, but individual entities (i.e. dictionary words)"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.07368","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1705.07368","created_at":"2026-05-18T00:22:59.350895+00:00"},{"alias_kind":"arxiv_version","alias_value":"1705.07368v3","created_at":"2026-05-18T00:22:59.350895+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.07368","created_at":"2026-05-18T00:22:59.350895+00:00"},{"alias_kind":"pith_short_12","alias_value":"VG3E7SVYRZH5","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_16","alias_value":"VG3E7SVYRZH5QICD","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_8","alias_value":"VG3E7SVY","created_at":"2026-05-18T12:31:49.984773+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI","json":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI.json","graph_json":"https://pith.science/api/pith-number/VG3E7SVYRZH5QICDAY62ZK3VSI/graph.json","events_json":"https://pith.science/api/pith-number/VG3E7SVYRZH5QICDAY62ZK3VSI/events.json","paper":"https://pith.science/paper/VG3E7SVY"},"agent_actions":{"view_html":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI","download_json":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI.json","view_paper":"https://pith.science/paper/VG3E7SVY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1705.07368&json=true","fetch_graph":"https://pith.science/api/pith-number/VG3E7SVYRZH5QICDAY62ZK3VSI/graph.json","fetch_events":"https://pith.science/api/pith-number/VG3E7SVYRZH5QICDAY62ZK3VSI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI/action/storage_attestation","attest_author":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI/action/author_attestation","sign_citation":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI/action/citation_signature","submit_replication":"https://pith.science/pith/VG3E7SVYRZH5QICDAY62ZK3VSI/action/replication_record"}},"created_at":"2026-05-18T00:22:59.350895+00:00","updated_at":"2026-05-18T00:22:59.350895+00:00"}