{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:AZSWZMD2XRY5FLB5POTPPF6LOM","short_pith_number":"pith:AZSWZMD2","canonical_record":{"source":{"id":"1701.03227","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-12T04:26:00Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"a18884f4f313d549d198231c9c0398d2b9c27a08432b9a009204582a3025e90a","abstract_canon_sha256":"2c5e30681c752306a00e7ddf705cf5391ef5ad63708adcf8c0616e9f22953efa"},"schema_version":"1.0"},"canonical_sha256":"06656cb07abc71d2ac3d7ba6f797cb7333e31f373d5f7c82aced8c7190d48f6f","source":{"kind":"arxiv","id":"1701.03227","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.03227","created_at":"2026-05-18T00:32:55Z"},{"alias_kind":"arxiv_version","alias_value":"1701.03227v3","created_at":"2026-05-18T00:32:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.03227","created_at":"2026-05-18T00:32:55Z"},{"alias_kind":"pith_short_12","alias_value":"AZSWZMD2XRY5","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"AZSWZMD2XRY5FLB5","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"AZSWZMD2","created_at":"2026-05-18T12:31:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:AZSWZMD2XRY5FLB5POTPPF6LOM","target":"record","payload":{"canonical_record":{"source":{"id":"1701.03227","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-12T04:26:00Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"a18884f4f313d549d198231c9c0398d2b9c27a08432b9a009204582a3025e90a","abstract_canon_sha256":"2c5e30681c752306a00e7ddf705cf5391ef5ad63708adcf8c0616e9f22953efa"},"schema_version":"1.0"},"canonical_sha256":"06656cb07abc71d2ac3d7ba6f797cb7333e31f373d5f7c82aced8c7190d48f6f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:32:55.407765Z","signature_b64":"FKkaMqDN0sama78gr8RKzuTjyxvFKhwDv44lS9OSQICuy6snKL3IQBrHco4UnJXvrvHlc4+fR1wWplhUnAkIDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"06656cb07abc71d2ac3d7ba6f797cb7333e31f373d5f7c82aced8c7190d48f6f","last_reissued_at":"2026-05-18T00:32:55.407213Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:32:55.407213Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1701.03227","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:32:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NFkSnaMFG0UVxSMzP9VYU3gcRpw8ocFa7opsvH0pHUCIx1mJw1Uwlcig5BfeSnlVa6iZpyZwIVXiwLTxFTx2AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:05:35.916168Z"},"content_sha256":"72047a71a83d606bd34f3df56e55a87e8604aa21a80b7dd991cd3807ad5323c5","schema_version":"1.0","event_id":"sha256:72047a71a83d606bd34f3df56e55a87e8604aa21a80b7dd991cd3807ad5323c5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:AZSWZMD2XRY5FLB5POTPPF6LOM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Prior matters: simple and general methods for evaluating and improving topic quality in topic modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Angela Fan, Finale Doshi-Velez, Luke Miratrix","submitted_at":"2017-01-12T04:26:00Z","abstract_excerpt":"Latent Dirichlet Allocation (LDA) models trained without stopword removal often produce topics with high posterior probabilities on uninformative words, obscuring the underlying corpus content. Even when canonical stopwords are manually removed, uninformative words common in that corpus will still dominate the most probable words in a topic. In this work, we first show how the standard topic quality measures of coherence and pointwise mutual information act counter-intuitively in the presence of common but irrelevant words, making it difficult to even quantitatively identify situations in whic"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.03227","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:32:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MxLj7fW+Ti94GCWlkG1sQg3jmgqlPhvfBtFs3QSkH1+DRFamruY9H6q1ZvEkE69wj4P8vcsQgCE1i8Tmf2OyBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T15:05:35.916905Z"},"content_sha256":"6597d67d573278fffb8cf34d78e6f438a5e9af1a3bcfca7536a778adb30b5be9","schema_version":"1.0","event_id":"sha256:6597d67d573278fffb8cf34d78e6f438a5e9af1a3bcfca7536a778adb30b5be9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AZSWZMD2XRY5FLB5POTPPF6LOM/bundle.json","state_url":"https://pith.science/pith/AZSWZMD2XRY5FLB5POTPPF6LOM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AZSWZMD2XRY5FLB5POTPPF6LOM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T15:05:35Z","links":{"resolver":"https://pith.science/pith/AZSWZMD2XRY5FLB5POTPPF6LOM","bundle":"https://pith.science/pith/AZSWZMD2XRY5FLB5POTPPF6LOM/bundle.json","state":"https://pith.science/pith/AZSWZMD2XRY5FLB5POTPPF6LOM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AZSWZMD2XRY5FLB5POTPPF6LOM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:AZSWZMD2XRY5FLB5POTPPF6LOM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2c5e30681c752306a00e7ddf705cf5391ef5ad63708adcf8c0616e9f22953efa","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-12T04:26:00Z","title_canon_sha256":"a18884f4f313d549d198231c9c0398d2b9c27a08432b9a009204582a3025e90a"},"schema_version":"1.0","source":{"id":"1701.03227","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.03227","created_at":"2026-05-18T00:32:55Z"},{"alias_kind":"arxiv_version","alias_value":"1701.03227v3","created_at":"2026-05-18T00:32:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.03227","created_at":"2026-05-18T00:32:55Z"},{"alias_kind":"pith_short_12","alias_value":"AZSWZMD2XRY5","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"AZSWZMD2XRY5FLB5","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"AZSWZMD2","created_at":"2026-05-18T12:31:08Z"}],"graph_snapshots":[{"event_id":"sha256:6597d67d573278fffb8cf34d78e6f438a5e9af1a3bcfca7536a778adb30b5be9","target":"graph","created_at":"2026-05-18T00:32:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Latent Dirichlet Allocation (LDA) models trained without stopword removal often produce topics with high posterior probabilities on uninformative words, obscuring the underlying corpus content. Even when canonical stopwords are manually removed, uninformative words common in that corpus will still dominate the most probable words in a topic. In this work, we first show how the standard topic quality measures of coherence and pointwise mutual information act counter-intuitively in the presence of common but irrelevant words, making it difficult to even quantitatively identify situations in whic","authors_text":"Angela Fan, Finale Doshi-Velez, Luke Miratrix","cross_cats":["cs.IR","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-12T04:26:00Z","title":"Prior matters: simple and general methods for evaluating and improving topic quality in topic modeling"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.03227","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:72047a71a83d606bd34f3df56e55a87e8604aa21a80b7dd991cd3807ad5323c5","target":"record","created_at":"2026-05-18T00:32:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2c5e30681c752306a00e7ddf705cf5391ef5ad63708adcf8c0616e9f22953efa","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-12T04:26:00Z","title_canon_sha256":"a18884f4f313d549d198231c9c0398d2b9c27a08432b9a009204582a3025e90a"},"schema_version":"1.0","source":{"id":"1701.03227","kind":"arxiv","version":3}},"canonical_sha256":"06656cb07abc71d2ac3d7ba6f797cb7333e31f373d5f7c82aced8c7190d48f6f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"06656cb07abc71d2ac3d7ba6f797cb7333e31f373d5f7c82aced8c7190d48f6f","first_computed_at":"2026-05-18T00:32:55.407213Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:32:55.407213Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FKkaMqDN0sama78gr8RKzuTjyxvFKhwDv44lS9OSQICuy6snKL3IQBrHco4UnJXvrvHlc4+fR1wWplhUnAkIDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:32:55.407765Z","signed_message":"canonical_sha256_bytes"},"source_id":"1701.03227","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:72047a71a83d606bd34f3df56e55a87e8604aa21a80b7dd991cd3807ad5323c5","sha256:6597d67d573278fffb8cf34d78e6f438a5e9af1a3bcfca7536a778adb30b5be9"],"state_sha256":"b9c064c48e4fc24714625f5649e30451c9d3e4046bf07433dabce91f5484663b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CLCh5EwoJQh/TCPjN0OFLVfo/Y978d/zhcm2jPNp8sAtJ8oKfDEJS4uCY38iTGQCxR/UGv7h2PNmFLBlTqFYDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T15:05:35.921143Z","bundle_sha256":"79e722f559aefedd4f99fcf264d9e1911f82a2bb47248301f149d537bf2a1dc0"}}