{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:GYHULLB5VWGB6RMSHMBZ2OUDU2","short_pith_number":"pith:GYHULLB5","schema_version":"1.0","canonical_sha256":"360f45ac3dad8c1f45923b039d3a83a6989d139966341357e1fe321ed87f08d9","source":{"kind":"arxiv","id":"1402.0422","version":1},"attestation_state":"computed","paper":{"title":"A high-reproducibility and high-accuracy method for automated topic classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG","physics.soc-ph"],"primary_cat":"stat.ML","authors_text":"Andrea Lancichinetti, Daniel Acuna, Jane X. Wang, Konrad K\\\"ording, Lu\\'is A. Nunes Amaral, M. Irmak Sirer","submitted_at":"2014-02-03T16:45:13Z","abstract_excerpt":"Much of human knowledge sits in large databases of unstructured text. Leveraging this knowledge requires algorithms that extract and record metadata on unstructured text documents. Assigning topics to documents will enable intelligent search, statistical characterization, and meaningful classification. Latent Dirichlet allocation (LDA) is the state-of-the-art in topic classification. Here, we perform a systematic theoretical and numerical analysis that demonstrates that current optimization techniques for LDA often yield results which are not accurate in inferring the most suitable model param"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1402.0422","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-02-03T16:45:13Z","cross_cats_sorted":["cs.IR","cs.LG","physics.soc-ph"],"title_canon_sha256":"23efd2e99553ee2b712e94fdcbf18f3e9d06b4f4887b5c577f67fc4ac02f439c","abstract_canon_sha256":"17963812a8a07911cfffc789ee9eeea5ba4ee191a3ac926e8a52f3bb7fc41b20"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:00:15.347194Z","signature_b64":"btKSDQ5n3Tz97pUzj5v/C1bN5yh0NHyCPWfNNXrxKS3aYZjTr32MCk8dPnRt7S71bKO4Qj84u4FFJLl2fADlDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"360f45ac3dad8c1f45923b039d3a83a6989d139966341357e1fe321ed87f08d9","last_reissued_at":"2026-05-18T03:00:15.346495Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:00:15.346495Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A high-reproducibility and high-accuracy method for automated topic classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG","physics.soc-ph"],"primary_cat":"stat.ML","authors_text":"Andrea Lancichinetti, Daniel Acuna, Jane X. Wang, Konrad K\\\"ording, Lu\\'is A. Nunes Amaral, M. Irmak Sirer","submitted_at":"2014-02-03T16:45:13Z","abstract_excerpt":"Much of human knowledge sits in large databases of unstructured text. Leveraging this knowledge requires algorithms that extract and record metadata on unstructured text documents. Assigning topics to documents will enable intelligent search, statistical characterization, and meaningful classification. Latent Dirichlet allocation (LDA) is the state-of-the-art in topic classification. Here, we perform a systematic theoretical and numerical analysis that demonstrates that current optimization techniques for LDA often yield results which are not accurate in inferring the most suitable model param"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1402.0422","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1402.0422","created_at":"2026-05-18T03:00:15.346604+00:00"},{"alias_kind":"arxiv_version","alias_value":"1402.0422v1","created_at":"2026-05-18T03:00:15.346604+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1402.0422","created_at":"2026-05-18T03:00:15.346604+00:00"},{"alias_kind":"pith_short_12","alias_value":"GYHULLB5VWGB","created_at":"2026-05-18T12:28:30.664211+00:00"},{"alias_kind":"pith_short_16","alias_value":"GYHULLB5VWGB6RMS","created_at":"2026-05-18T12:28:30.664211+00:00"},{"alias_kind":"pith_short_8","alias_value":"GYHULLB5","created_at":"2026-05-18T12:28:30.664211+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2","json":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2.json","graph_json":"https://pith.science/api/pith-number/GYHULLB5VWGB6RMSHMBZ2OUDU2/graph.json","events_json":"https://pith.science/api/pith-number/GYHULLB5VWGB6RMSHMBZ2OUDU2/events.json","paper":"https://pith.science/paper/GYHULLB5"},"agent_actions":{"view_html":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2","download_json":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2.json","view_paper":"https://pith.science/paper/GYHULLB5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1402.0422&json=true","fetch_graph":"https://pith.science/api/pith-number/GYHULLB5VWGB6RMSHMBZ2OUDU2/graph.json","fetch_events":"https://pith.science/api/pith-number/GYHULLB5VWGB6RMSHMBZ2OUDU2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2/action/storage_attestation","attest_author":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2/action/author_attestation","sign_citation":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2/action/citation_signature","submit_replication":"https://pith.science/pith/GYHULLB5VWGB6RMSHMBZ2OUDU2/action/replication_record"}},"created_at":"2026-05-18T03:00:15.346604+00:00","updated_at":"2026-05-18T03:00:15.346604+00:00"}