{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:CWBXVSEJZFZ4KK2VONCOCSHEHU","short_pith_number":"pith:CWBXVSEJ","canonical_record":{"source":{"id":"1907.01636","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2019-06-17T06:59:25Z","cross_cats_sorted":["cs.CL","cs.LG","stat.ML"],"title_canon_sha256":"0273f1c46c6581c1466e0defc6e544aeb3f39767e51f7158ffb914ed28955e87","abstract_canon_sha256":"e4a410e8ab972387a1c052e41d8147783126b0b1f7c30ffbc3c52ede106d6ae1"},"schema_version":"1.0"},"canonical_sha256":"15837ac889c973c52b557344e148e43d104a004dbd6456722f98a126f7d28278","source":{"kind":"arxiv","id":"1907.01636","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.01636","created_at":"2026-05-17T23:41:35Z"},{"alias_kind":"arxiv_version","alias_value":"1907.01636v1","created_at":"2026-05-17T23:41:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.01636","created_at":"2026-05-17T23:41:35Z"},{"alias_kind":"pith_short_12","alias_value":"CWBXVSEJZFZ4","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"CWBXVSEJZFZ4KK2V","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"CWBXVSEJ","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:CWBXVSEJZFZ4KK2VONCOCSHEHU","target":"record","payload":{"canonical_record":{"source":{"id":"1907.01636","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2019-06-17T06:59:25Z","cross_cats_sorted":["cs.CL","cs.LG","stat.ML"],"title_canon_sha256":"0273f1c46c6581c1466e0defc6e544aeb3f39767e51f7158ffb914ed28955e87","abstract_canon_sha256":"e4a410e8ab972387a1c052e41d8147783126b0b1f7c30ffbc3c52ede106d6ae1"},"schema_version":"1.0"},"canonical_sha256":"15837ac889c973c52b557344e148e43d104a004dbd6456722f98a126f7d28278","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:35.689433Z","signature_b64":"31yOsDKlIh8TfL18W4jnbDYIqcDNMXo3ku4sjm/mAaSOE67nygjZbQqTYdJJO3d2oCmxzZGmzUeM91zVfYfWAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"15837ac889c973c52b557344e148e43d104a004dbd6456722f98a126f7d28278","last_reissued_at":"2026-05-17T23:41:35.688813Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:35.688813Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.01636","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"K/1LPWTmsFd4dW/iehiq6TGqwv+wkL7QqzIWqbByS9d5Xj3RX25buvzkPfLzAsn2Nn4ZDvfpPRaSWxogjOjWDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T23:12:25.882929Z"},"content_sha256":"54050e57d633f5b34e74fd7c6dd97d990d92252313c7e4134c3a732610744332","schema_version":"1.0","event_id":"sha256:54050e57d633f5b34e74fd7c6dd97d990d92252313c7e4134c3a732610744332"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:CWBXVSEJZFZ4KK2VONCOCSHEHU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Analyses of Multi-collection Corpora via Compound Topic Modeling","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG","stat.ML"],"primary_cat":"cs.IR","authors_text":"Clint P. George, George Michailidis, Wei Xia","submitted_at":"2019-06-17T06:59:25Z","abstract_excerpt":"As electronically stored data grow in daily life, obtaining novel and relevant information becomes challenging in text mining. Thus people have sought statistical methods based on term frequency, matrix algebra, or topic modeling for text mining. Popular topic models have centered on one single text collection, which is deficient for comparative text analyses. We consider a setting where one can partition the corpus into subcollections. Each subcollection shares a common set of topics, but there exists relative variation in topic proportions among collections. Including any prior knowledge abo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.01636","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yWIfuHW9z25Vi7H0oT9K6tq5hILAvtkwMliPXB3NpwtS9GuKt7Lj2rSflEOJqWmAMfa4ISFJ6JmEpr37YQlUDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T23:12:25.883277Z"},"content_sha256":"c1a23c88af4c01e8bf578d8d4b40eaebbc75261380c12534b87a260df58a780c","schema_version":"1.0","event_id":"sha256:c1a23c88af4c01e8bf578d8d4b40eaebbc75261380c12534b87a260df58a780c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU/bundle.json","state_url":"https://pith.science/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T23:12:25Z","links":{"resolver":"https://pith.science/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU","bundle":"https://pith.science/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU/bundle.json","state":"https://pith.science/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CWBXVSEJZFZ4KK2VONCOCSHEHU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:CWBXVSEJZFZ4KK2VONCOCSHEHU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e4a410e8ab972387a1c052e41d8147783126b0b1f7c30ffbc3c52ede106d6ae1","cross_cats_sorted":["cs.CL","cs.LG","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2019-06-17T06:59:25Z","title_canon_sha256":"0273f1c46c6581c1466e0defc6e544aeb3f39767e51f7158ffb914ed28955e87"},"schema_version":"1.0","source":{"id":"1907.01636","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.01636","created_at":"2026-05-17T23:41:35Z"},{"alias_kind":"arxiv_version","alias_value":"1907.01636v1","created_at":"2026-05-17T23:41:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.01636","created_at":"2026-05-17T23:41:35Z"},{"alias_kind":"pith_short_12","alias_value":"CWBXVSEJZFZ4","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"CWBXVSEJZFZ4KK2V","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"CWBXVSEJ","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:c1a23c88af4c01e8bf578d8d4b40eaebbc75261380c12534b87a260df58a780c","target":"graph","created_at":"2026-05-17T23:41:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"As electronically stored data grow in daily life, obtaining novel and relevant information becomes challenging in text mining. Thus people have sought statistical methods based on term frequency, matrix algebra, or topic modeling for text mining. Popular topic models have centered on one single text collection, which is deficient for comparative text analyses. We consider a setting where one can partition the corpus into subcollections. Each subcollection shares a common set of topics, but there exists relative variation in topic proportions among collections. Including any prior knowledge abo","authors_text":"Clint P. George, George Michailidis, Wei Xia","cross_cats":["cs.CL","cs.LG","stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2019-06-17T06:59:25Z","title":"Analyses of Multi-collection Corpora via Compound Topic Modeling"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.01636","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:54050e57d633f5b34e74fd7c6dd97d990d92252313c7e4134c3a732610744332","target":"record","created_at":"2026-05-17T23:41:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e4a410e8ab972387a1c052e41d8147783126b0b1f7c30ffbc3c52ede106d6ae1","cross_cats_sorted":["cs.CL","cs.LG","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2019-06-17T06:59:25Z","title_canon_sha256":"0273f1c46c6581c1466e0defc6e544aeb3f39767e51f7158ffb914ed28955e87"},"schema_version":"1.0","source":{"id":"1907.01636","kind":"arxiv","version":1}},"canonical_sha256":"15837ac889c973c52b557344e148e43d104a004dbd6456722f98a126f7d28278","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"15837ac889c973c52b557344e148e43d104a004dbd6456722f98a126f7d28278","first_computed_at":"2026-05-17T23:41:35.688813Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:41:35.688813Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"31yOsDKlIh8TfL18W4jnbDYIqcDNMXo3ku4sjm/mAaSOE67nygjZbQqTYdJJO3d2oCmxzZGmzUeM91zVfYfWAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:41:35.689433Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.01636","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:54050e57d633f5b34e74fd7c6dd97d990d92252313c7e4134c3a732610744332","sha256:c1a23c88af4c01e8bf578d8d4b40eaebbc75261380c12534b87a260df58a780c"],"state_sha256":"9336997bd0a2dbb232edf412cb9368045be1dbb9148b643f26cb86eeb639f577"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wvMOAuI0FqwFEtfC0wwgRNAgz17s+aCR7LWss1iF0cfxlrKcrHOb9f80gNHtOZP6wqd4dSGCBMfXYA2p6FbYAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T23:12:25.885197Z","bundle_sha256":"235186e09219da8cca598b3bdd17b0acb19964f256ad94a67af5b0261c1f3cfe"}}