{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:ZYLTATUG7I4MI2VZ7RYQQZEHNB","short_pith_number":"pith:ZYLTATUG","canonical_record":{"source":{"id":"1808.03733","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-11T01:14:50Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"eaad6252cf5eef4be824f5152fd3e35da06e49d4dc5e9387114a82a353b6f34b","abstract_canon_sha256":"f9af7bb9e61f89e226fd50055c9d43806ff3399901be6ea90096202d5745b295"},"schema_version":"1.0"},"canonical_sha256":"ce17304e86fa38c46ab9fc710864876875c85e325de920537f6f5297db457c43","source":{"kind":"arxiv","id":"1808.03733","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.03733","created_at":"2026-05-18T00:08:14Z"},{"alias_kind":"arxiv_version","alias_value":"1808.03733v2","created_at":"2026-05-18T00:08:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.03733","created_at":"2026-05-18T00:08:14Z"},{"alias_kind":"pith_short_12","alias_value":"ZYLTATUG7I4M","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZYLTATUG7I4MI2VZ","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZYLTATUG","created_at":"2026-05-18T12:33:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:ZYLTATUG7I4MI2VZ7RYQQZEHNB","target":"record","payload":{"canonical_record":{"source":{"id":"1808.03733","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-11T01:14:50Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"eaad6252cf5eef4be824f5152fd3e35da06e49d4dc5e9387114a82a353b6f34b","abstract_canon_sha256":"f9af7bb9e61f89e226fd50055c9d43806ff3399901be6ea90096202d5745b295"},"schema_version":"1.0"},"canonical_sha256":"ce17304e86fa38c46ab9fc710864876875c85e325de920537f6f5297db457c43","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:08:14.230534Z","signature_b64":"mdtOW9nQ1/Wh5yG3qh03DbS5fy8EPm31Z32IsTLqBcHOqkgTxmNJrinsG3r6z3efu5N2KxIN9PCm9gLID2oTCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ce17304e86fa38c46ab9fc710864876875c85e325de920537f6f5297db457c43","last_reissued_at":"2026-05-18T00:08:14.230156Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:08:14.230156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1808.03733","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NmVKoq21kv/xO7Eok8e22ez3aV0U+1Rrsm2zoxITuE44l4qczJneo9Hwl2aRxJfqTA2pt1DVm5mxJBkVqfbcAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T08:03:34.160990Z"},"content_sha256":"780ccd80a712278347dc6fd35eea91966739551830d9c1374663bb90fe3b9e32","schema_version":"1.0","event_id":"sha256:780ccd80a712278347dc6fd35eea91966739551830d9c1374663bb90fe3b9e32"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:ZYLTATUG7I4MI2VZ7RYQQZEHNB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Familia: A Configurable Topic Modeling Framework for Industrial Text Engineering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Di Jiang, Huang He, Hua Wu, Jinhua Peng, Rongzhong Lian, Siqi Bao, Yuanfeng Song","submitted_at":"2018-08-11T01:14:50Z","abstract_excerpt":"In the last decade, a variety of topic models have been proposed for text engineering. However, except Probabilistic Latent Semantic Analysis (PLSA) and Latent Dirichlet Allocation (LDA), most of existing topic models are seldom applied or considered in industrial scenarios. This phenomenon is caused by the fact that there are very few convenient tools to support these topic models so far. Intimidated by the demanding expertise and labor of designing and implementing parameter inference algorithms, software engineers are prone to simply resort to PLSA/LDA, without considering whether it is pro"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.03733","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"E//8HeHHLWTqg9mXbIMRNcL4ZUsN+Bkk8kWhAfETZfD2fznggCosYw8rNQgiIfxp+yFMHvI+uSImNoumLv8XCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T08:03:34.161399Z"},"content_sha256":"5ba48e6a8db1372d01966ca53ca378c6933e04c0109394cfe00787a83cf22e44","schema_version":"1.0","event_id":"sha256:5ba48e6a8db1372d01966ca53ca378c6933e04c0109394cfe00787a83cf22e44"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB/bundle.json","state_url":"https://pith.science/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T08:03:34Z","links":{"resolver":"https://pith.science/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB","bundle":"https://pith.science/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB/bundle.json","state":"https://pith.science/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZYLTATUG7I4MI2VZ7RYQQZEHNB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:ZYLTATUG7I4MI2VZ7RYQQZEHNB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f9af7bb9e61f89e226fd50055c9d43806ff3399901be6ea90096202d5745b295","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-11T01:14:50Z","title_canon_sha256":"eaad6252cf5eef4be824f5152fd3e35da06e49d4dc5e9387114a82a353b6f34b"},"schema_version":"1.0","source":{"id":"1808.03733","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.03733","created_at":"2026-05-18T00:08:14Z"},{"alias_kind":"arxiv_version","alias_value":"1808.03733v2","created_at":"2026-05-18T00:08:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.03733","created_at":"2026-05-18T00:08:14Z"},{"alias_kind":"pith_short_12","alias_value":"ZYLTATUG7I4M","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZYLTATUG7I4MI2VZ","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZYLTATUG","created_at":"2026-05-18T12:33:07Z"}],"graph_snapshots":[{"event_id":"sha256:5ba48e6a8db1372d01966ca53ca378c6933e04c0109394cfe00787a83cf22e44","target":"graph","created_at":"2026-05-18T00:08:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In the last decade, a variety of topic models have been proposed for text engineering. However, except Probabilistic Latent Semantic Analysis (PLSA) and Latent Dirichlet Allocation (LDA), most of existing topic models are seldom applied or considered in industrial scenarios. This phenomenon is caused by the fact that there are very few convenient tools to support these topic models so far. Intimidated by the demanding expertise and labor of designing and implementing parameter inference algorithms, software engineers are prone to simply resort to PLSA/LDA, without considering whether it is pro","authors_text":"Di Jiang, Huang He, Hua Wu, Jinhua Peng, Rongzhong Lian, Siqi Bao, Yuanfeng Song","cross_cats":["cs.IR","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-11T01:14:50Z","title":"Familia: A Configurable Topic Modeling Framework for Industrial Text Engineering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.03733","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:780ccd80a712278347dc6fd35eea91966739551830d9c1374663bb90fe3b9e32","target":"record","created_at":"2026-05-18T00:08:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f9af7bb9e61f89e226fd50055c9d43806ff3399901be6ea90096202d5745b295","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-11T01:14:50Z","title_canon_sha256":"eaad6252cf5eef4be824f5152fd3e35da06e49d4dc5e9387114a82a353b6f34b"},"schema_version":"1.0","source":{"id":"1808.03733","kind":"arxiv","version":2}},"canonical_sha256":"ce17304e86fa38c46ab9fc710864876875c85e325de920537f6f5297db457c43","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ce17304e86fa38c46ab9fc710864876875c85e325de920537f6f5297db457c43","first_computed_at":"2026-05-18T00:08:14.230156Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:08:14.230156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mdtOW9nQ1/Wh5yG3qh03DbS5fy8EPm31Z32IsTLqBcHOqkgTxmNJrinsG3r6z3efu5N2KxIN9PCm9gLID2oTCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:08:14.230534Z","signed_message":"canonical_sha256_bytes"},"source_id":"1808.03733","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:780ccd80a712278347dc6fd35eea91966739551830d9c1374663bb90fe3b9e32","sha256:5ba48e6a8db1372d01966ca53ca378c6933e04c0109394cfe00787a83cf22e44"],"state_sha256":"cb75fcaf8dd657911804800418069f61020fcc2395310112a6167c3185b22d5e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OEYHRjhrHQ11wqt8vkLOMcwFepmCWXoHdfJNhIsq4Y8hpkN74l0RRnfAMyHyIojThM0vwIssqZkd80SaMj+7Aw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T08:03:34.163907Z","bundle_sha256":"0595eddcafeb2da954d01179ef6a9010d67024237bf937e3a7ea6840737006af"}}