{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:24AITJNEF6A347XYZHR4VMGPFW","short_pith_number":"pith:24AITJNE","canonical_record":{"source":{"id":"1410.6466","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-10-23T19:38:44Z","cross_cats_sorted":["cs.IR","cs.LG","stat.CO"],"title_canon_sha256":"26bdd726e38e4b9f1d98b07beaf327d378b90fec625e1bde6a9f9afbf351fdce","abstract_canon_sha256":"860a2e7e687c49300a96f9a114609459823975944abdd3cfcfa42c9952613bad"},"schema_version":"1.0"},"canonical_sha256":"d70089a5a42f81be7ef8c9e3cab0cf2d8c46d73ff96ca21b04ee1b8d7efff97f","source":{"kind":"arxiv","id":"1410.6466","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1410.6466","created_at":"2026-05-18T02:26:58Z"},{"alias_kind":"arxiv_version","alias_value":"1410.6466v2","created_at":"2026-05-18T02:26:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1410.6466","created_at":"2026-05-18T02:26:58Z"},{"alias_kind":"pith_short_12","alias_value":"24AITJNEF6A3","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_16","alias_value":"24AITJNEF6A347XY","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_8","alias_value":"24AITJNE","created_at":"2026-05-18T12:28:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:24AITJNEF6A347XYZHR4VMGPFW","target":"record","payload":{"canonical_record":{"source":{"id":"1410.6466","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-10-23T19:38:44Z","cross_cats_sorted":["cs.IR","cs.LG","stat.CO"],"title_canon_sha256":"26bdd726e38e4b9f1d98b07beaf327d378b90fec625e1bde6a9f9afbf351fdce","abstract_canon_sha256":"860a2e7e687c49300a96f9a114609459823975944abdd3cfcfa42c9952613bad"},"schema_version":"1.0"},"canonical_sha256":"d70089a5a42f81be7ef8c9e3cab0cf2d8c46d73ff96ca21b04ee1b8d7efff97f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:26:58.724232Z","signature_b64":"gmiew/e5p1+CHJRBHjzTRME8dRi4HIWDr+OhdBtOqtCjHjZHpHtCqqM9J1QOebAJvWkwofBwzijQOLC63TtHBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d70089a5a42f81be7ef8c9e3cab0cf2d8c46d73ff96ca21b04ee1b8d7efff97f","last_reissued_at":"2026-05-18T02:26:58.723857Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:26:58.723857Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1410.6466","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:26:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9dHhcj7MDyO71FGQSZONtE/DE88hdTmNmjdJoTygC0OzVXywNOX0Un/x2cF3VfJ6bwDW5qOSE/UZu5J/u27JAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T02:21:06.118576Z"},"content_sha256":"fafa0578f443cb000d3ac58dcc6c801a38b7870bbd83a45bc87e24f8d8d4f04b","schema_version":"1.0","event_id":"sha256:fafa0578f443cb000d3ac58dcc6c801a38b7870bbd83a45bc87e24f8d8d4f04b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:24AITJNEF6A347XYZHR4VMGPFW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Model Selection for Topic Models via Spectral Decomposition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG","stat.CO"],"primary_cat":"stat.ML","authors_text":"Dehua Cheng, Xinran He, Yan Liu","submitted_at":"2014-10-23T19:38:44Z","abstract_excerpt":"Topic models have achieved significant successes in analyzing large-scale text corpus. In practical applications, we are always confronted with the challenge of model selection, i.e., how to appropriately set the number of topics. Following recent advances in topic model inference via tensor decomposition, we make a first attempt to provide theoretical analysis on model selection in latent Dirichlet allocation. Under mild conditions, we derive the upper bound and lower bound on the number of topics given a text collection of finite size. Experimental results demonstrate that our bounds are acc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1410.6466","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:26:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MuQdU68ZPd+sRbEXiwC3Y1lAYYXBo9Xn/rCHCbIUEyPorL7D81Flm+BW3dZQRXXMDn6XpwxMGuaM1ssABD4VBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T02:21:06.119139Z"},"content_sha256":"ae9d26e96ffb5e58ca187a22f035db9fef084cdc9100b77ba7fbb0380b669c06","schema_version":"1.0","event_id":"sha256:ae9d26e96ffb5e58ca187a22f035db9fef084cdc9100b77ba7fbb0380b669c06"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/24AITJNEF6A347XYZHR4VMGPFW/bundle.json","state_url":"https://pith.science/pith/24AITJNEF6A347XYZHR4VMGPFW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/24AITJNEF6A347XYZHR4VMGPFW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-19T02:21:06Z","links":{"resolver":"https://pith.science/pith/24AITJNEF6A347XYZHR4VMGPFW","bundle":"https://pith.science/pith/24AITJNEF6A347XYZHR4VMGPFW/bundle.json","state":"https://pith.science/pith/24AITJNEF6A347XYZHR4VMGPFW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/24AITJNEF6A347XYZHR4VMGPFW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:24AITJNEF6A347XYZHR4VMGPFW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"860a2e7e687c49300a96f9a114609459823975944abdd3cfcfa42c9952613bad","cross_cats_sorted":["cs.IR","cs.LG","stat.CO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-10-23T19:38:44Z","title_canon_sha256":"26bdd726e38e4b9f1d98b07beaf327d378b90fec625e1bde6a9f9afbf351fdce"},"schema_version":"1.0","source":{"id":"1410.6466","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1410.6466","created_at":"2026-05-18T02:26:58Z"},{"alias_kind":"arxiv_version","alias_value":"1410.6466v2","created_at":"2026-05-18T02:26:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1410.6466","created_at":"2026-05-18T02:26:58Z"},{"alias_kind":"pith_short_12","alias_value":"24AITJNEF6A3","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_16","alias_value":"24AITJNEF6A347XY","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_8","alias_value":"24AITJNE","created_at":"2026-05-18T12:28:09Z"}],"graph_snapshots":[{"event_id":"sha256:ae9d26e96ffb5e58ca187a22f035db9fef084cdc9100b77ba7fbb0380b669c06","target":"graph","created_at":"2026-05-18T02:26:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Topic models have achieved significant successes in analyzing large-scale text corpus. In practical applications, we are always confronted with the challenge of model selection, i.e., how to appropriately set the number of topics. Following recent advances in topic model inference via tensor decomposition, we make a first attempt to provide theoretical analysis on model selection in latent Dirichlet allocation. Under mild conditions, we derive the upper bound and lower bound on the number of topics given a text collection of finite size. Experimental results demonstrate that our bounds are acc","authors_text":"Dehua Cheng, Xinran He, Yan Liu","cross_cats":["cs.IR","cs.LG","stat.CO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-10-23T19:38:44Z","title":"Model Selection for Topic Models via Spectral Decomposition"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1410.6466","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fafa0578f443cb000d3ac58dcc6c801a38b7870bbd83a45bc87e24f8d8d4f04b","target":"record","created_at":"2026-05-18T02:26:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"860a2e7e687c49300a96f9a114609459823975944abdd3cfcfa42c9952613bad","cross_cats_sorted":["cs.IR","cs.LG","stat.CO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-10-23T19:38:44Z","title_canon_sha256":"26bdd726e38e4b9f1d98b07beaf327d378b90fec625e1bde6a9f9afbf351fdce"},"schema_version":"1.0","source":{"id":"1410.6466","kind":"arxiv","version":2}},"canonical_sha256":"d70089a5a42f81be7ef8c9e3cab0cf2d8c46d73ff96ca21b04ee1b8d7efff97f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d70089a5a42f81be7ef8c9e3cab0cf2d8c46d73ff96ca21b04ee1b8d7efff97f","first_computed_at":"2026-05-18T02:26:58.723857Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:26:58.723857Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gmiew/e5p1+CHJRBHjzTRME8dRi4HIWDr+OhdBtOqtCjHjZHpHtCqqM9J1QOebAJvWkwofBwzijQOLC63TtHBg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:26:58.724232Z","signed_message":"canonical_sha256_bytes"},"source_id":"1410.6466","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fafa0578f443cb000d3ac58dcc6c801a38b7870bbd83a45bc87e24f8d8d4f04b","sha256:ae9d26e96ffb5e58ca187a22f035db9fef084cdc9100b77ba7fbb0380b669c06"],"state_sha256":"aab91b06f5d87b31c465ce9d23f29538ac7f35bb21845611d3acd30a99c50dc4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5jRP4tGoVpmHwbVXLZImWBjvNbmsBIeNyAnwqEd3eV1+LppZqcVFcoXvAFj4uTbDVRiy4UF7dQdikKFy+pL5Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-19T02:21:06.120974Z","bundle_sha256":"73bf5dba431e48455a10e058c80787a22f9294d1347d1d390e1917e1fe127c11"}}