{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:EJ5OSTLIXCZFQTZPG7UM52N6E4","short_pith_number":"pith:EJ5OSTLI","canonical_record":{"source":{"id":"1206.4631","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-06-18T15:11:38Z","cross_cats_sorted":["cs.CL","cs.IR","stat.ME","stat.ML"],"title_canon_sha256":"ca895f818d68960f4228db600adce00f615b36d28b9b12a98186231652681fc9","abstract_canon_sha256":"90f8bf9d93de5194ca3becf4ef5b474d6c4036974f85a456275a4a635972392e"},"schema_version":"1.0"},"canonical_sha256":"227ae94d68b8b2584f2f37e8cee9be2721f7ad325ec8deb754137b5e5f00e8ef","source":{"kind":"arxiv","id":"1206.4631","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.4631","created_at":"2026-05-18T02:46:31Z"},{"alias_kind":"arxiv_version","alias_value":"1206.4631v3","created_at":"2026-05-18T02:46:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.4631","created_at":"2026-05-18T02:46:31Z"},{"alias_kind":"pith_short_12","alias_value":"EJ5OSTLIXCZF","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_16","alias_value":"EJ5OSTLIXCZFQTZP","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_8","alias_value":"EJ5OSTLI","created_at":"2026-05-18T12:27:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:EJ5OSTLIXCZFQTZPG7UM52N6E4","target":"record","payload":{"canonical_record":{"source":{"id":"1206.4631","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-06-18T15:11:38Z","cross_cats_sorted":["cs.CL","cs.IR","stat.ME","stat.ML"],"title_canon_sha256":"ca895f818d68960f4228db600adce00f615b36d28b9b12a98186231652681fc9","abstract_canon_sha256":"90f8bf9d93de5194ca3becf4ef5b474d6c4036974f85a456275a4a635972392e"},"schema_version":"1.0"},"canonical_sha256":"227ae94d68b8b2584f2f37e8cee9be2721f7ad325ec8deb754137b5e5f00e8ef","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:46:31.591066Z","signature_b64":"ttlFbPVHqb1c9nz9sj8MlVzhbsfK2j+n8a6bI6w2f22i6QaSctepcbkAegX9/JJKQqMC2jhbY1NSGiEdP2Y/CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"227ae94d68b8b2584f2f37e8cee9be2721f7ad325ec8deb754137b5e5f00e8ef","last_reissued_at":"2026-05-18T02:46:31.590618Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:46:31.590618Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1206.4631","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:46:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2vJRJFUhHb0MVuCnL6YTWlYh5qC875FHeh0ct0fLFpsxFbZ7xVxfYQNA3BZoZFyJUksUOtas2WEKlbFDPzs8Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T23:59:25.316136Z"},"content_sha256":"c85a9ca4c3824f3e64ab0a0aa89d284717118057db4ff2c54de90d5435adf90a","schema_version":"1.0","event_id":"sha256:c85a9ca4c3824f3e64ab0a0aa89d284717118057db4ff2c54de90d5435adf90a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:EJ5OSTLIXCZFQTZPG7UM52N6E4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Poisson convolution model for characterizing topical content with word frequency and exclusivity","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR","stat.ME","stat.ML"],"primary_cat":"cs.LG","authors_text":"Edoardo M Airoldi, Jonathan M Bischof","submitted_at":"2012-06-18T15:11:38Z","abstract_excerpt":"An ongoing challenge in the analysis of document collections is how to summarize content in terms of a set of inferred themes that can be interpreted substantively in terms of topics. The current practice of parametrizing the themes in terms of most frequent words limits interpretability by ignoring the differential use of words across topics. We argue that words that are both common and exclusive to a theme are more effective at characterizing topical content. We consider a setting where professional editors have annotated documents to a collection of topic categories, organized into a tree, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.4631","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:46:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nDV1VpyDvpmfUCWJySeUoWslUE2rnJywK+YPfXnbV0PP0AUDQOH6uDVbo+gHIviaiesSuP1PJlD13UXRpIqPAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T23:59:25.316495Z"},"content_sha256":"5a2ca48c31f75ca7bd0a0058f52e18e97f83865e5ac51c07c42180a3d7a7ef52","schema_version":"1.0","event_id":"sha256:5a2ca48c31f75ca7bd0a0058f52e18e97f83865e5ac51c07c42180a3d7a7ef52"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4/bundle.json","state_url":"https://pith.science/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T23:59:25Z","links":{"resolver":"https://pith.science/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4","bundle":"https://pith.science/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4/bundle.json","state":"https://pith.science/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EJ5OSTLIXCZFQTZPG7UM52N6E4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:EJ5OSTLIXCZFQTZPG7UM52N6E4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"90f8bf9d93de5194ca3becf4ef5b474d6c4036974f85a456275a4a635972392e","cross_cats_sorted":["cs.CL","cs.IR","stat.ME","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-06-18T15:11:38Z","title_canon_sha256":"ca895f818d68960f4228db600adce00f615b36d28b9b12a98186231652681fc9"},"schema_version":"1.0","source":{"id":"1206.4631","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.4631","created_at":"2026-05-18T02:46:31Z"},{"alias_kind":"arxiv_version","alias_value":"1206.4631v3","created_at":"2026-05-18T02:46:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.4631","created_at":"2026-05-18T02:46:31Z"},{"alias_kind":"pith_short_12","alias_value":"EJ5OSTLIXCZF","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_16","alias_value":"EJ5OSTLIXCZFQTZP","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_8","alias_value":"EJ5OSTLI","created_at":"2026-05-18T12:27:04Z"}],"graph_snapshots":[{"event_id":"sha256:5a2ca48c31f75ca7bd0a0058f52e18e97f83865e5ac51c07c42180a3d7a7ef52","target":"graph","created_at":"2026-05-18T02:46:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"An ongoing challenge in the analysis of document collections is how to summarize content in terms of a set of inferred themes that can be interpreted substantively in terms of topics. The current practice of parametrizing the themes in terms of most frequent words limits interpretability by ignoring the differential use of words across topics. We argue that words that are both common and exclusive to a theme are more effective at characterizing topical content. We consider a setting where professional editors have annotated documents to a collection of topic categories, organized into a tree, ","authors_text":"Edoardo M Airoldi, Jonathan M Bischof","cross_cats":["cs.CL","cs.IR","stat.ME","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-06-18T15:11:38Z","title":"A Poisson convolution model for characterizing topical content with word frequency and exclusivity"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.4631","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c85a9ca4c3824f3e64ab0a0aa89d284717118057db4ff2c54de90d5435adf90a","target":"record","created_at":"2026-05-18T02:46:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"90f8bf9d93de5194ca3becf4ef5b474d6c4036974f85a456275a4a635972392e","cross_cats_sorted":["cs.CL","cs.IR","stat.ME","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-06-18T15:11:38Z","title_canon_sha256":"ca895f818d68960f4228db600adce00f615b36d28b9b12a98186231652681fc9"},"schema_version":"1.0","source":{"id":"1206.4631","kind":"arxiv","version":3}},"canonical_sha256":"227ae94d68b8b2584f2f37e8cee9be2721f7ad325ec8deb754137b5e5f00e8ef","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"227ae94d68b8b2584f2f37e8cee9be2721f7ad325ec8deb754137b5e5f00e8ef","first_computed_at":"2026-05-18T02:46:31.590618Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:46:31.590618Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ttlFbPVHqb1c9nz9sj8MlVzhbsfK2j+n8a6bI6w2f22i6QaSctepcbkAegX9/JJKQqMC2jhbY1NSGiEdP2Y/CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T02:46:31.591066Z","signed_message":"canonical_sha256_bytes"},"source_id":"1206.4631","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c85a9ca4c3824f3e64ab0a0aa89d284717118057db4ff2c54de90d5435adf90a","sha256:5a2ca48c31f75ca7bd0a0058f52e18e97f83865e5ac51c07c42180a3d7a7ef52"],"state_sha256":"fbcb7aec4e4712a32b4f87731cb6c452e36779d59c91231addeee092cb231a93"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7TpSpitCwLBA2ah2l57Zyf2X/2o1xPEVfghD/i1Qn45RT6qHSLMjHogy81RwcJs2rJD/7/1lxrIlVmI05iTsCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T23:59:25.318292Z","bundle_sha256":"4c82bf4a407543bbaff5fc0c5ade04c672d49b59c4b4fb6757d3eecde917d8f3"}}