{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:K7QZNHAOZ5TOREZUNGP453ZLSK","short_pith_number":"pith:K7QZNHAO","canonical_record":{"source":{"id":"1508.01067","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-05T13:18:51Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"f721a7f9d37231c59addfacd13febae585da0ba006f1964e30d7603227241c90","abstract_canon_sha256":"b0e958e9cafdf71ddb4468713c8fba7c9023e2b75a7ca6aad931941732c4ffd4"},"schema_version":"1.0"},"canonical_sha256":"57e1969c0ecf66e89334699fceef2b92a5ceeb60d131e26b6757d3af71eb5181","source":{"kind":"arxiv","id":"1508.01067","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1508.01067","created_at":"2026-05-18T01:35:44Z"},{"alias_kind":"arxiv_version","alias_value":"1508.01067v1","created_at":"2026-05-18T01:35:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1508.01067","created_at":"2026-05-18T01:35:44Z"},{"alias_kind":"pith_short_12","alias_value":"K7QZNHAOZ5TO","created_at":"2026-05-18T12:29:27Z"},{"alias_kind":"pith_short_16","alias_value":"K7QZNHAOZ5TOREZU","created_at":"2026-05-18T12:29:27Z"},{"alias_kind":"pith_short_8","alias_value":"K7QZNHAO","created_at":"2026-05-18T12:29:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:K7QZNHAOZ5TOREZUNGP453ZLSK","target":"record","payload":{"canonical_record":{"source":{"id":"1508.01067","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-05T13:18:51Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"f721a7f9d37231c59addfacd13febae585da0ba006f1964e30d7603227241c90","abstract_canon_sha256":"b0e958e9cafdf71ddb4468713c8fba7c9023e2b75a7ca6aad931941732c4ffd4"},"schema_version":"1.0"},"canonical_sha256":"57e1969c0ecf66e89334699fceef2b92a5ceeb60d131e26b6757d3af71eb5181","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:35:44.136032Z","signature_b64":"Y58nvMAI68ysVl+yGa6OGuquEQ2ejg6xA1wIr1zXiEPTLsc+wZq3osGvJbJWBE4aehTXvZieVB1KKdWLZxcaDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"57e1969c0ecf66e89334699fceef2b92a5ceeb60d131e26b6757d3af71eb5181","last_reissued_at":"2026-05-18T01:35:44.135563Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:35:44.135563Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1508.01067","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:35:44Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kiqHjib+tGwjyqYVwjhi/X/nzgbA4E3w//kbNgzP1wRp/gFEzvX0QsLLB2cSF2WeeNdtrEUZWJbeT5Wz3TvPCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:07:28.062412Z"},"content_sha256":"b7c413528fa444455de6ebadd0230e95ee9b2bd495b882188126692c7012dcee","schema_version":"1.0","event_id":"sha256:b7c413528fa444455de6ebadd0230e95ee9b2bd495b882188126692c7012dcee"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:K7QZNHAOZ5TOREZUNGP453ZLSK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Topic Stability over Noisy Sources","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Derek Greene, Gerard Lynch, Jing Su, Ois\\'in Boydell","submitted_at":"2015-08-05T13:18:51Z","abstract_excerpt":"Topic modelling techniques such as LDA have recently been applied to speech transcripts and OCR output. These corpora may contain noisy or erroneous texts which may undermine topic stability. Therefore, it is important to know how well a topic modelling algorithm will perform when applied to noisy data. In this paper we show that different types of textual noise will have diverse effects on the stability of different topic models. From these observations, we propose guidelines for text corpus generation, with a focus on automatic speech transcription. We also suggest topic model selection meth"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1508.01067","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:35:44Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"y+yoTHn0iJGu3uQ/gC7BNvFXgZNUq+rLX57RBO2klQO+lHtb6QhCU517xO3qirV+8csN//OrZYd2c5L6nvknAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:07:28.063027Z"},"content_sha256":"24454471a731da8422aea7eccc1f2d5b641262a2629e04e3453f8c51ff4c5c8f","schema_version":"1.0","event_id":"sha256:24454471a731da8422aea7eccc1f2d5b641262a2629e04e3453f8c51ff4c5c8f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/K7QZNHAOZ5TOREZUNGP453ZLSK/bundle.json","state_url":"https://pith.science/pith/K7QZNHAOZ5TOREZUNGP453ZLSK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/K7QZNHAOZ5TOREZUNGP453ZLSK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T21:07:28Z","links":{"resolver":"https://pith.science/pith/K7QZNHAOZ5TOREZUNGP453ZLSK","bundle":"https://pith.science/pith/K7QZNHAOZ5TOREZUNGP453ZLSK/bundle.json","state":"https://pith.science/pith/K7QZNHAOZ5TOREZUNGP453ZLSK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/K7QZNHAOZ5TOREZUNGP453ZLSK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:K7QZNHAOZ5TOREZUNGP453ZLSK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b0e958e9cafdf71ddb4468713c8fba7c9023e2b75a7ca6aad931941732c4ffd4","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-05T13:18:51Z","title_canon_sha256":"f721a7f9d37231c59addfacd13febae585da0ba006f1964e30d7603227241c90"},"schema_version":"1.0","source":{"id":"1508.01067","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1508.01067","created_at":"2026-05-18T01:35:44Z"},{"alias_kind":"arxiv_version","alias_value":"1508.01067v1","created_at":"2026-05-18T01:35:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1508.01067","created_at":"2026-05-18T01:35:44Z"},{"alias_kind":"pith_short_12","alias_value":"K7QZNHAOZ5TO","created_at":"2026-05-18T12:29:27Z"},{"alias_kind":"pith_short_16","alias_value":"K7QZNHAOZ5TOREZU","created_at":"2026-05-18T12:29:27Z"},{"alias_kind":"pith_short_8","alias_value":"K7QZNHAO","created_at":"2026-05-18T12:29:27Z"}],"graph_snapshots":[{"event_id":"sha256:24454471a731da8422aea7eccc1f2d5b641262a2629e04e3453f8c51ff4c5c8f","target":"graph","created_at":"2026-05-18T01:35:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Topic modelling techniques such as LDA have recently been applied to speech transcripts and OCR output. These corpora may contain noisy or erroneous texts which may undermine topic stability. Therefore, it is important to know how well a topic modelling algorithm will perform when applied to noisy data. In this paper we show that different types of textual noise will have diverse effects on the stability of different topic models. From these observations, we propose guidelines for text corpus generation, with a focus on automatic speech transcription. We also suggest topic model selection meth","authors_text":"Derek Greene, Gerard Lynch, Jing Su, Ois\\'in Boydell","cross_cats":["cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-05T13:18:51Z","title":"Topic Stability over Noisy Sources"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1508.01067","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b7c413528fa444455de6ebadd0230e95ee9b2bd495b882188126692c7012dcee","target":"record","created_at":"2026-05-18T01:35:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b0e958e9cafdf71ddb4468713c8fba7c9023e2b75a7ca6aad931941732c4ffd4","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-08-05T13:18:51Z","title_canon_sha256":"f721a7f9d37231c59addfacd13febae585da0ba006f1964e30d7603227241c90"},"schema_version":"1.0","source":{"id":"1508.01067","kind":"arxiv","version":1}},"canonical_sha256":"57e1969c0ecf66e89334699fceef2b92a5ceeb60d131e26b6757d3af71eb5181","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"57e1969c0ecf66e89334699fceef2b92a5ceeb60d131e26b6757d3af71eb5181","first_computed_at":"2026-05-18T01:35:44.135563Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:35:44.135563Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Y58nvMAI68ysVl+yGa6OGuquEQ2ejg6xA1wIr1zXiEPTLsc+wZq3osGvJbJWBE4aehTXvZieVB1KKdWLZxcaDg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:35:44.136032Z","signed_message":"canonical_sha256_bytes"},"source_id":"1508.01067","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b7c413528fa444455de6ebadd0230e95ee9b2bd495b882188126692c7012dcee","sha256:24454471a731da8422aea7eccc1f2d5b641262a2629e04e3453f8c51ff4c5c8f"],"state_sha256":"bb4c99c6d3231ede4a0cc4078e17a7999d55815c5e8de3c448465873060a3336"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"owapIO3BfuhCO/c4555NMlz4nMneCz7Xvh8fQKpE7SCDE9YlPJrxCw38UCD0c5kgUv9zcexqTrpk7PH9Psa3AA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T21:07:28.066279Z","bundle_sha256":"44ca6ce45d5885de19e2ac4c9881fbf190de7c30b6d16af34e4623fcab417032"}}