{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2011:IUM3SKSMF7UYSGEFHM2TLHPB7T","short_pith_number":"pith:IUM3SKSM","schema_version":"1.0","canonical_sha256":"4519b92a4c2fe98918853b35359de1fcc49693aec1ecba4383ca69ad9b0e6d36","source":{"kind":"arxiv","id":"1107.3765","version":1},"attestation_state":"computed","paper":{"title":"Using Variational Inference and MapReduce to Scale Topic Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.AI","authors_text":"Jordan Boyd-Graber, Ke Zhai, Nima Asadi","submitted_at":"2011-07-19T16:32:22Z","abstract_excerpt":"Latent Dirichlet Allocation (LDA) is a popular topic modeling technique for exploring document collections. Because of the increasing prevalence of large datasets, there is a need to improve the scalability of inference of LDA. In this paper, we propose a technique called ~\\emph{MapReduce LDA} (Mr. LDA) to accommodate very large corpus collections in the MapReduce framework. In contrast to other techniques to scale inference for LDA, which use Gibbs sampling, we use variational inference. Our solution efficiently distributes computation and is relatively simple to implement. More importantly, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1107.3765","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2011-07-19T16:32:22Z","cross_cats_sorted":["cs.DC"],"title_canon_sha256":"347c16a48beffc7b2e9435231480599cf453fd7e66c7259a0226a27ee7511631","abstract_canon_sha256":"a81fe3af9005b3aa5e699d5770397eba286666a261da9903e25c4522c29495bd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T04:17:16.033275Z","signature_b64":"zLR2DJvOvqORurR8I0R9CsX3WNparuHP2H22x+xqaZWzkcH9e/SgoeJyzRCMzd5+VkSB4gakCuuswcMWs4rHDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4519b92a4c2fe98918853b35359de1fcc49693aec1ecba4383ca69ad9b0e6d36","last_reissued_at":"2026-05-18T04:17:16.032620Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T04:17:16.032620Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Using Variational Inference and MapReduce to Scale Topic Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.AI","authors_text":"Jordan Boyd-Graber, Ke Zhai, Nima Asadi","submitted_at":"2011-07-19T16:32:22Z","abstract_excerpt":"Latent Dirichlet Allocation (LDA) is a popular topic modeling technique for exploring document collections. Because of the increasing prevalence of large datasets, there is a need to improve the scalability of inference of LDA. In this paper, we propose a technique called ~\\emph{MapReduce LDA} (Mr. LDA) to accommodate very large corpus collections in the MapReduce framework. In contrast to other techniques to scale inference for LDA, which use Gibbs sampling, we use variational inference. Our solution efficiently distributes computation and is relatively simple to implement. More importantly, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1107.3765","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1107.3765","created_at":"2026-05-18T04:17:16.032732+00:00"},{"alias_kind":"arxiv_version","alias_value":"1107.3765v1","created_at":"2026-05-18T04:17:16.032732+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1107.3765","created_at":"2026-05-18T04:17:16.032732+00:00"},{"alias_kind":"pith_short_12","alias_value":"IUM3SKSMF7UY","created_at":"2026-05-18T12:26:32.869790+00:00"},{"alias_kind":"pith_short_16","alias_value":"IUM3SKSMF7UYSGEF","created_at":"2026-05-18T12:26:32.869790+00:00"},{"alias_kind":"pith_short_8","alias_value":"IUM3SKSM","created_at":"2026-05-18T12:26:32.869790+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T","json":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T.json","graph_json":"https://pith.science/api/pith-number/IUM3SKSMF7UYSGEFHM2TLHPB7T/graph.json","events_json":"https://pith.science/api/pith-number/IUM3SKSMF7UYSGEFHM2TLHPB7T/events.json","paper":"https://pith.science/paper/IUM3SKSM"},"agent_actions":{"view_html":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T","download_json":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T.json","view_paper":"https://pith.science/paper/IUM3SKSM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1107.3765&json=true","fetch_graph":"https://pith.science/api/pith-number/IUM3SKSMF7UYSGEFHM2TLHPB7T/graph.json","fetch_events":"https://pith.science/api/pith-number/IUM3SKSMF7UYSGEFHM2TLHPB7T/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T/action/storage_attestation","attest_author":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T/action/author_attestation","sign_citation":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T/action/citation_signature","submit_replication":"https://pith.science/pith/IUM3SKSMF7UYSGEFHM2TLHPB7T/action/replication_record"}},"created_at":"2026-05-18T04:17:16.032732+00:00","updated_at":"2026-05-18T04:17:16.032732+00:00"}