{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2012:CSDYI7FRPB5JW56NQH3VUYJN2J","short_pith_number":"pith:CSDYI7FR","schema_version":"1.0","canonical_sha256":"1487847cb1787a9b77cd81f75a612dd242454a72873a2f2b9c7ed8830d4d7524","source":{"kind":"arxiv","id":"1210.2179","version":3},"attestation_state":"computed","paper":{"title":"Fast Online EM for Big Topic Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jia Zeng, Xiao-Qin Cao, Zhi-Qiang Liu","submitted_at":"2012-10-08T08:17:18Z","abstract_excerpt":"The expectation-maximization (EM) algorithm can compute the maximum-likelihood (ML) or maximum a posterior (MAP) point estimate of the mixture models or latent variable models such as latent Dirichlet allocation (LDA), which has been one of the most popular probabilistic topic modeling methods in the past decade. However, batch EM has high time and space complexities to learn big LDA models from big data streams. In this paper, we present a fast online EM (FOEM) algorithm that infers the topic distribution from the previously unseen documents incrementally with constant memory requirements. Wi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1210.2179","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-08T08:17:18Z","cross_cats_sorted":[],"title_canon_sha256":"95cf162c9d710e6244eaddb262e71a3dc99fe102e49df9847200f995b729ec6f","abstract_canon_sha256":"5cabd2ce213525b04f9eee0d8c516d4a6159261370b2aef55218da274babc323"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:25:16.495926Z","signature_b64":"fXz72zEP+JpuY5COoySwPQdgWh/dzwpIyO/jjP1j8kiEJC9lSWHVNggwRziYlaRpQqxPT0iWeIQtm1BuVtg8Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1487847cb1787a9b77cd81f75a612dd242454a72873a2f2b9c7ed8830d4d7524","last_reissued_at":"2026-05-18T01:25:16.495392Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:25:16.495392Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Fast Online EM for Big Topic Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jia Zeng, Xiao-Qin Cao, Zhi-Qiang Liu","submitted_at":"2012-10-08T08:17:18Z","abstract_excerpt":"The expectation-maximization (EM) algorithm can compute the maximum-likelihood (ML) or maximum a posterior (MAP) point estimate of the mixture models or latent variable models such as latent Dirichlet allocation (LDA), which has been one of the most popular probabilistic topic modeling methods in the past decade. However, batch EM has high time and space complexities to learn big LDA models from big data streams. In this paper, we present a fast online EM (FOEM) algorithm that infers the topic distribution from the previously unseen documents incrementally with constant memory requirements. Wi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1210.2179","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1210.2179","created_at":"2026-05-18T01:25:16.495476+00:00"},{"alias_kind":"arxiv_version","alias_value":"1210.2179v3","created_at":"2026-05-18T01:25:16.495476+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1210.2179","created_at":"2026-05-18T01:25:16.495476+00:00"},{"alias_kind":"pith_short_12","alias_value":"CSDYI7FRPB5J","created_at":"2026-05-18T12:27:01.376967+00:00"},{"alias_kind":"pith_short_16","alias_value":"CSDYI7FRPB5JW56N","created_at":"2026-05-18T12:27:01.376967+00:00"},{"alias_kind":"pith_short_8","alias_value":"CSDYI7FR","created_at":"2026-05-18T12:27:01.376967+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J","json":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J.json","graph_json":"https://pith.science/api/pith-number/CSDYI7FRPB5JW56NQH3VUYJN2J/graph.json","events_json":"https://pith.science/api/pith-number/CSDYI7FRPB5JW56NQH3VUYJN2J/events.json","paper":"https://pith.science/paper/CSDYI7FR"},"agent_actions":{"view_html":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J","download_json":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J.json","view_paper":"https://pith.science/paper/CSDYI7FR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1210.2179&json=true","fetch_graph":"https://pith.science/api/pith-number/CSDYI7FRPB5JW56NQH3VUYJN2J/graph.json","fetch_events":"https://pith.science/api/pith-number/CSDYI7FRPB5JW56NQH3VUYJN2J/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J/action/storage_attestation","attest_author":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J/action/author_attestation","sign_citation":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J/action/citation_signature","submit_replication":"https://pith.science/pith/CSDYI7FRPB5JW56NQH3VUYJN2J/action/replication_record"}},"created_at":"2026-05-18T01:25:16.495476+00:00","updated_at":"2026-05-18T01:25:16.495476+00:00"}