{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:WVXTDVCSIAUHGTYM4ZEBIV426E","short_pith_number":"pith:WVXTDVCS","schema_version":"1.0","canonical_sha256":"b56f31d4524028734f0ce64814579af11ef9260a425439978de52df8c141db34","source":{"kind":"arxiv","id":"1505.02065","version":6},"attestation_state":"computed","paper":{"title":"Dense Distributions from Sparse Samples: Improved Gibbs Sampling Parameter Estimators for LDA","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ML","authors_text":"Grigorios Tsoumakas, James R. Foulds, Timothy N. Rubin, Yannis Papanikolaou","submitted_at":"2015-05-08T15:32:43Z","abstract_excerpt":"We introduce a novel approach for estimating Latent Dirichlet Allocation (LDA) parameters from collapsed Gibbs samples (CGS), by leveraging the full conditional distributions over the latent variable assignments to efficiently average over multiple samples, for little more computational cost than drawing a single additional collapsed Gibbs sample. Our approach can be understood as adapting the soft clustering methodology of Collapsed Variational Bayes (CVB0) to CGS parameter estimation, in order to get the best of both techniques. Our estimators can straightforwardly be applied to the output o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1505.02065","kind":"arxiv","version":6},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-05-08T15:32:43Z","cross_cats_sorted":[],"title_canon_sha256":"888a99130c753cb862401dc6bdac26907e62d514463bc0907d57cd586edd61cd","abstract_canon_sha256":"8892f9c945681c1ae88291b35062e8cf9a0865fbad6a44d7a842b494d6475cdf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:46:43.252217Z","signature_b64":"8qEm4bU7Klw3WO6nkRqMPX+jSh093P5uoZLPmllUUl6eXYV4Oig19TPqrRJGIEEmHlacDmQgO04tm58X/bopBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b56f31d4524028734f0ce64814579af11ef9260a425439978de52df8c141db34","last_reissued_at":"2026-05-18T00:46:43.251451Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:46:43.251451Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Dense Distributions from Sparse Samples: Improved Gibbs Sampling Parameter Estimators for LDA","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ML","authors_text":"Grigorios Tsoumakas, James R. Foulds, Timothy N. Rubin, Yannis Papanikolaou","submitted_at":"2015-05-08T15:32:43Z","abstract_excerpt":"We introduce a novel approach for estimating Latent Dirichlet Allocation (LDA) parameters from collapsed Gibbs samples (CGS), by leveraging the full conditional distributions over the latent variable assignments to efficiently average over multiple samples, for little more computational cost than drawing a single additional collapsed Gibbs sample. Our approach can be understood as adapting the soft clustering methodology of Collapsed Variational Bayes (CVB0) to CGS parameter estimation, in order to get the best of both techniques. Our estimators can straightforwardly be applied to the output o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.02065","kind":"arxiv","version":6},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1505.02065","created_at":"2026-05-18T00:46:43.251561+00:00"},{"alias_kind":"arxiv_version","alias_value":"1505.02065v6","created_at":"2026-05-18T00:46:43.251561+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.02065","created_at":"2026-05-18T00:46:43.251561+00:00"},{"alias_kind":"pith_short_12","alias_value":"WVXTDVCSIAUH","created_at":"2026-05-18T12:29:47.479230+00:00"},{"alias_kind":"pith_short_16","alias_value":"WVXTDVCSIAUHGTYM","created_at":"2026-05-18T12:29:47.479230+00:00"},{"alias_kind":"pith_short_8","alias_value":"WVXTDVCS","created_at":"2026-05-18T12:29:47.479230+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E","json":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E.json","graph_json":"https://pith.science/api/pith-number/WVXTDVCSIAUHGTYM4ZEBIV426E/graph.json","events_json":"https://pith.science/api/pith-number/WVXTDVCSIAUHGTYM4ZEBIV426E/events.json","paper":"https://pith.science/paper/WVXTDVCS"},"agent_actions":{"view_html":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E","download_json":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E.json","view_paper":"https://pith.science/paper/WVXTDVCS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1505.02065&json=true","fetch_graph":"https://pith.science/api/pith-number/WVXTDVCSIAUHGTYM4ZEBIV426E/graph.json","fetch_events":"https://pith.science/api/pith-number/WVXTDVCSIAUHGTYM4ZEBIV426E/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E/action/storage_attestation","attest_author":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E/action/author_attestation","sign_citation":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E/action/citation_signature","submit_replication":"https://pith.science/pith/WVXTDVCSIAUHGTYM4ZEBIV426E/action/replication_record"}},"created_at":"2026-05-18T00:46:43.251561+00:00","updated_at":"2026-05-18T00:46:43.251561+00:00"}