{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:POOL6DU7VYLENK2VFW7JAOBZ3A","short_pith_number":"pith:POOL6DU7","canonical_record":{"source":{"id":"1811.12500","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-29T21:39:44Z","cross_cats_sorted":["cs.IR","stat.ML"],"title_canon_sha256":"81a393d056506017c238d812bb3afac9ae419b554a6e87783d60098f615805f3","abstract_canon_sha256":"0042b4032617c5bf530f4df3e1e4abbc1f92b1b8914bebeccbd0f84b8e84f6a7"},"schema_version":"1.0"},"canonical_sha256":"7b9cbf0e9fae1646ab552dbe903839d82bcda521d54285ac0a86f3fc04900aa3","source":{"kind":"arxiv","id":"1811.12500","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.12500","created_at":"2026-05-17T23:59:29Z"},{"alias_kind":"arxiv_version","alias_value":"1811.12500v1","created_at":"2026-05-17T23:59:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.12500","created_at":"2026-05-17T23:59:29Z"},{"alias_kind":"pith_short_12","alias_value":"POOL6DU7VYLE","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"POOL6DU7VYLENK2V","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"POOL6DU7","created_at":"2026-05-18T12:32:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:POOL6DU7VYLENK2VFW7JAOBZ3A","target":"record","payload":{"canonical_record":{"source":{"id":"1811.12500","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-29T21:39:44Z","cross_cats_sorted":["cs.IR","stat.ML"],"title_canon_sha256":"81a393d056506017c238d812bb3afac9ae419b554a6e87783d60098f615805f3","abstract_canon_sha256":"0042b4032617c5bf530f4df3e1e4abbc1f92b1b8914bebeccbd0f84b8e84f6a7"},"schema_version":"1.0"},"canonical_sha256":"7b9cbf0e9fae1646ab552dbe903839d82bcda521d54285ac0a86f3fc04900aa3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:29.450313Z","signature_b64":"1vBSOlITMGOohb/zGrFz/xl0ez/K39zkY3xEpITLPZsDofgSL82wbzKHTWgyVCLt0UCI9vJbZiRDCcqFPAieBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7b9cbf0e9fae1646ab552dbe903839d82bcda521d54285ac0a86f3fc04900aa3","last_reissued_at":"2026-05-17T23:59:29.449809Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:29.449809Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.12500","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HVub/xflKHgVDvfm9saYtUKCxD6S7m1geye0fU+O5xsn71ZodR4vACYXJg/50fi/1904XsMK8/NWkthqq3rHBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T05:23:52.746721Z"},"content_sha256":"46f3f053c0e3f0a4f171a92052605c0f7aa01c93695a7ea85a2717ac8e01a295","schema_version":"1.0","event_id":"sha256:46f3f053c0e3f0a4f171a92052605c0f7aa01c93695a7ea85a2717ac8e01a295"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:POOL6DU7VYLENK2VFW7JAOBZ3A","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Sequential Embedding Induced Text Clustering, a Non-parametric Bayesian Approach","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Qi Lou, Sargur N. Srihari, Tiehang Duan, Xiaohui Xie","submitted_at":"2018-11-29T21:39:44Z","abstract_excerpt":"Current state-of-the-art nonparametric Bayesian text clustering methods model documents through multinomial distribution on bags of words. Although these methods can effectively utilize the word burstiness representation of documents and achieve decent performance, they do not explore the sequential information of text and relationships among synonyms. In this paper, the documents are modeled as the joint of bags of words, sequential features and word embeddings. We proposed Sequential Embedding induced Dirichlet Process Mixture Model (SiDPMM) to effectively exploit this joint document represe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.12500","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NpiIPz+KZouaEJBc+4Byywf7B71Shjhb3PL/RkzH6YN/8G/dCvLLCQLmEhGcv57UOJ6IRPpthcpdMBonAhhWBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T05:23:52.747272Z"},"content_sha256":"50e8d2d8c2c822ea5370139c23d8e8169a314d4eaea9483314fcb71ba1b1c4e2","schema_version":"1.0","event_id":"sha256:50e8d2d8c2c822ea5370139c23d8e8169a314d4eaea9483314fcb71ba1b1c4e2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/POOL6DU7VYLENK2VFW7JAOBZ3A/bundle.json","state_url":"https://pith.science/pith/POOL6DU7VYLENK2VFW7JAOBZ3A/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/POOL6DU7VYLENK2VFW7JAOBZ3A/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-12T05:23:52Z","links":{"resolver":"https://pith.science/pith/POOL6DU7VYLENK2VFW7JAOBZ3A","bundle":"https://pith.science/pith/POOL6DU7VYLENK2VFW7JAOBZ3A/bundle.json","state":"https://pith.science/pith/POOL6DU7VYLENK2VFW7JAOBZ3A/state.json","well_known_bundle":"https://pith.science/.well-known/pith/POOL6DU7VYLENK2VFW7JAOBZ3A/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:POOL6DU7VYLENK2VFW7JAOBZ3A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0042b4032617c5bf530f4df3e1e4abbc1f92b1b8914bebeccbd0f84b8e84f6a7","cross_cats_sorted":["cs.IR","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-29T21:39:44Z","title_canon_sha256":"81a393d056506017c238d812bb3afac9ae419b554a6e87783d60098f615805f3"},"schema_version":"1.0","source":{"id":"1811.12500","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.12500","created_at":"2026-05-17T23:59:29Z"},{"alias_kind":"arxiv_version","alias_value":"1811.12500v1","created_at":"2026-05-17T23:59:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.12500","created_at":"2026-05-17T23:59:29Z"},{"alias_kind":"pith_short_12","alias_value":"POOL6DU7VYLE","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"POOL6DU7VYLENK2V","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"POOL6DU7","created_at":"2026-05-18T12:32:46Z"}],"graph_snapshots":[{"event_id":"sha256:50e8d2d8c2c822ea5370139c23d8e8169a314d4eaea9483314fcb71ba1b1c4e2","target":"graph","created_at":"2026-05-17T23:59:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Current state-of-the-art nonparametric Bayesian text clustering methods model documents through multinomial distribution on bags of words. Although these methods can effectively utilize the word burstiness representation of documents and achieve decent performance, they do not explore the sequential information of text and relationships among synonyms. In this paper, the documents are modeled as the joint of bags of words, sequential features and word embeddings. We proposed Sequential Embedding induced Dirichlet Process Mixture Model (SiDPMM) to effectively exploit this joint document represe","authors_text":"Qi Lou, Sargur N. Srihari, Tiehang Duan, Xiaohui Xie","cross_cats":["cs.IR","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-29T21:39:44Z","title":"Sequential Embedding Induced Text Clustering, a Non-parametric Bayesian Approach"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.12500","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:46f3f053c0e3f0a4f171a92052605c0f7aa01c93695a7ea85a2717ac8e01a295","target":"record","created_at":"2026-05-17T23:59:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0042b4032617c5bf530f4df3e1e4abbc1f92b1b8914bebeccbd0f84b8e84f6a7","cross_cats_sorted":["cs.IR","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-29T21:39:44Z","title_canon_sha256":"81a393d056506017c238d812bb3afac9ae419b554a6e87783d60098f615805f3"},"schema_version":"1.0","source":{"id":"1811.12500","kind":"arxiv","version":1}},"canonical_sha256":"7b9cbf0e9fae1646ab552dbe903839d82bcda521d54285ac0a86f3fc04900aa3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7b9cbf0e9fae1646ab552dbe903839d82bcda521d54285ac0a86f3fc04900aa3","first_computed_at":"2026-05-17T23:59:29.449809Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:59:29.449809Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1vBSOlITMGOohb/zGrFz/xl0ez/K39zkY3xEpITLPZsDofgSL82wbzKHTWgyVCLt0UCI9vJbZiRDCcqFPAieBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:59:29.450313Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.12500","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:46f3f053c0e3f0a4f171a92052605c0f7aa01c93695a7ea85a2717ac8e01a295","sha256:50e8d2d8c2c822ea5370139c23d8e8169a314d4eaea9483314fcb71ba1b1c4e2"],"state_sha256":"9fb69c4486ed5819bba6bc26f2fab2ad58f866aca4f561a4887aec34a0b6ef33"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kFImnHOsOfrrA4tEUp2wbnSnQSlwyvkLCuaQgOdmDxI4c6u2AqnIuZh9iPbJm/eilx5a2oGxk7/lP/GzHteDAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-12T05:23:52.749971Z","bundle_sha256":"73c7ef91464fde60a6dfb610231e45fa8f3c0503255157912db386811293ebdb"}}