{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:BHQSNPCJOGYILHWB32IBIANBA5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0c8027140b1dadfe9425a679071bef4ab4d389d0fce294a754d7d328639e7de3","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-06-24T17:10:29Z","title_canon_sha256":"05a57a1c48d6ef01807695d5f538348d76628126d96d2b1094e90509ca40aaa9"},"schema_version":"1.0","source":{"id":"1406.6312","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1406.6312","created_at":"2026-05-18T02:34:44Z"},{"alias_kind":"arxiv_version","alias_value":"1406.6312v2","created_at":"2026-05-18T02:34:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1406.6312","created_at":"2026-05-18T02:34:44Z"},{"alias_kind":"pith_short_12","alias_value":"BHQSNPCJOGYI","created_at":"2026-05-18T12:28:22Z"},{"alias_kind":"pith_short_16","alias_value":"BHQSNPCJOGYILHWB","created_at":"2026-05-18T12:28:22Z"},{"alias_kind":"pith_short_8","alias_value":"BHQSNPCJ","created_at":"2026-05-18T12:28:22Z"}],"graph_snapshots":[{"event_id":"sha256:e1b780054ae9eab744aaf812783539580050c387b1480e0d50d29beda535b9e0","target":"graph","created_at":"2026-05-18T02:34:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"While most topic modeling algorithms model text corpora with unigrams, human interpretation often relies on inherent grouping of terms into phrases. As such, we consider the problem of discovering topical phrases of mixed lengths. Existing work either performs post processing to the inference results of unigram-based topic models, or utilizes complex n-gram-discovery topic models. These methods generally produce low-quality topical phrases or suffer from poor scalability on even moderately-sized datasets. We propose a different approach that is both computationally efficient and effective. Our","authors_text":"Ahmed El-Kishky, Chi Wang, Clare Voss, Jiawei Han, Yanglei Song","cross_cats":["cs.IR","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-06-24T17:10:29Z","title":"Scalable Topical Phrase Mining from Text Corpora"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1406.6312","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8e2dde38b533c321ff83b34a03059b1bb42b7afe287b148b442922738d6b44ef","target":"record","created_at":"2026-05-18T02:34:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0c8027140b1dadfe9425a679071bef4ab4d389d0fce294a754d7d328639e7de3","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-06-24T17:10:29Z","title_canon_sha256":"05a57a1c48d6ef01807695d5f538348d76628126d96d2b1094e90509ca40aaa9"},"schema_version":"1.0","source":{"id":"1406.6312","kind":"arxiv","version":2}},"canonical_sha256":"09e126bc4971b0859ec1de901401a107685466985e858ffda2b5a162b3ab6fcb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"09e126bc4971b0859ec1de901401a107685466985e858ffda2b5a162b3ab6fcb","first_computed_at":"2026-05-18T02:34:44.853854Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:34:44.853854Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"byAlFEOjRI8CqQ5lCey93Jr/iJdWGXON6A9LfvBi+PFNKfI9j29A2zs4ZuxyPGRC27SQ7RehHx+aVO7PeXZbAw==","signature_status":"signed_v1","signed_at":"2026-05-18T02:34:44.854213Z","signed_message":"canonical_sha256_bytes"},"source_id":"1406.6312","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8e2dde38b533c321ff83b34a03059b1bb42b7afe287b148b442922738d6b44ef","sha256:e1b780054ae9eab744aaf812783539580050c387b1480e0d50d29beda535b9e0"],"state_sha256":"ad6e585dde48bc2bb99cb2021839bd6df193ea52036d87280fd8b8a68542508d"}