{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:22NAFFMJC7OI2OMTNUYEGOLWWH","short_pith_number":"pith:22NAFFMJ","canonical_record":{"source":{"id":"1408.5427","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-08-21T17:58:33Z","cross_cats_sorted":["cs.CL","cs.IR","cs.LG"],"title_canon_sha256":"1f61f535f50a72e5d5794a235f180d203f6123eec8998f46cf20d444b6f84b47","abstract_canon_sha256":"3f7f59b645e0cd340d32471744e879645feb20eb0d44175a9d06a43e149165a8"},"schema_version":"1.0"},"canonical_sha256":"d69a02958917dc8d39936d30433976b1ca887e81e4e9cc6f8c9c25db396ab74b","source":{"kind":"arxiv","id":"1408.5427","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1408.5427","created_at":"2026-05-18T02:44:25Z"},{"alias_kind":"arxiv_version","alias_value":"1408.5427v1","created_at":"2026-05-18T02:44:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1408.5427","created_at":"2026-05-18T02:44:25Z"},{"alias_kind":"pith_short_12","alias_value":"22NAFFMJC7OI","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_16","alias_value":"22NAFFMJC7OI2OMT","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_8","alias_value":"22NAFFMJ","created_at":"2026-05-18T12:28:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:22NAFFMJC7OI2OMTNUYEGOLWWH","target":"record","payload":{"canonical_record":{"source":{"id":"1408.5427","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-08-21T17:58:33Z","cross_cats_sorted":["cs.CL","cs.IR","cs.LG"],"title_canon_sha256":"1f61f535f50a72e5d5794a235f180d203f6123eec8998f46cf20d444b6f84b47","abstract_canon_sha256":"3f7f59b645e0cd340d32471744e879645feb20eb0d44175a9d06a43e149165a8"},"schema_version":"1.0"},"canonical_sha256":"d69a02958917dc8d39936d30433976b1ca887e81e4e9cc6f8c9c25db396ab74b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:25.203988Z","signature_b64":"6Tw+3NxF2ZoOeSyhOtFDOTDE54JobaJ7QV+MHLIfZrmrXs5Gi+XUDko5vNoS3JftDYj4BC8jeC3qn2zenndxDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d69a02958917dc8d39936d30433976b1ca887e81e4e9cc6f8c9c25db396ab74b","last_reissued_at":"2026-05-18T02:44:25.203583Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:25.203583Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1408.5427","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:44:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"55cokH3FpszXy7e9LUGEXoSBNm/SupmmTBxdvXLGm6+6jnXv5uj30JPYEaNCIev31554nOE1MyVRgP/LRcUBCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T16:03:28.496479Z"},"content_sha256":"2b52d4bb9a6e2f3f1bb6a7c8ee4a9abb16c3bfd616780a36739c46ed505382f8","schema_version":"1.0","event_id":"sha256:2b52d4bb9a6e2f3f1bb6a7c8ee4a9abb16c3bfd616780a36739c46ed505382f8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:22NAFFMJC7OI2OMTNUYEGOLWWH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Case Study in Text Mining: Interpreting Twitter Data From World Cup Tweets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR","cs.LG"],"primary_cat":"stat.ML","authors_text":"Caley Johns, Carl Meyer, Carol Sadek, Daniel Godfrey, Shaina Race","submitted_at":"2014-08-21T17:58:33Z","abstract_excerpt":"Cluster analysis is a field of data analysis that extracts underlying patterns in data. One application of cluster analysis is in text-mining, the analysis of large collections of text to find similarities between documents. We used a collection of about 30,000 tweets extracted from Twitter just before the World Cup started. A common problem with real world text data is the presence of linguistic noise. In our case it would be extraneous tweets that are unrelated to dominant themes. To combat this problem, we created an algorithm that combined the DBSCAN algorithm and a consensus matrix. This "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1408.5427","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:44:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5reP1Ak7r0KYmxqrC486R88wLO54z7DCrlP+zmdtOjZ5gAqtVRV2D0IjrL5URui4xhKNmey97kTGP0nScC5mCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T16:03:28.497152Z"},"content_sha256":"5a61bec48f616a4eb986796b716e40a2f657b7c62dc633295aae0725b4f6242f","schema_version":"1.0","event_id":"sha256:5a61bec48f616a4eb986796b716e40a2f657b7c62dc633295aae0725b4f6242f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/22NAFFMJC7OI2OMTNUYEGOLWWH/bundle.json","state_url":"https://pith.science/pith/22NAFFMJC7OI2OMTNUYEGOLWWH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/22NAFFMJC7OI2OMTNUYEGOLWWH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T16:03:28Z","links":{"resolver":"https://pith.science/pith/22NAFFMJC7OI2OMTNUYEGOLWWH","bundle":"https://pith.science/pith/22NAFFMJC7OI2OMTNUYEGOLWWH/bundle.json","state":"https://pith.science/pith/22NAFFMJC7OI2OMTNUYEGOLWWH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/22NAFFMJC7OI2OMTNUYEGOLWWH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:22NAFFMJC7OI2OMTNUYEGOLWWH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3f7f59b645e0cd340d32471744e879645feb20eb0d44175a9d06a43e149165a8","cross_cats_sorted":["cs.CL","cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-08-21T17:58:33Z","title_canon_sha256":"1f61f535f50a72e5d5794a235f180d203f6123eec8998f46cf20d444b6f84b47"},"schema_version":"1.0","source":{"id":"1408.5427","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1408.5427","created_at":"2026-05-18T02:44:25Z"},{"alias_kind":"arxiv_version","alias_value":"1408.5427v1","created_at":"2026-05-18T02:44:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1408.5427","created_at":"2026-05-18T02:44:25Z"},{"alias_kind":"pith_short_12","alias_value":"22NAFFMJC7OI","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_16","alias_value":"22NAFFMJC7OI2OMT","created_at":"2026-05-18T12:28:09Z"},{"alias_kind":"pith_short_8","alias_value":"22NAFFMJ","created_at":"2026-05-18T12:28:09Z"}],"graph_snapshots":[{"event_id":"sha256:5a61bec48f616a4eb986796b716e40a2f657b7c62dc633295aae0725b4f6242f","target":"graph","created_at":"2026-05-18T02:44:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Cluster analysis is a field of data analysis that extracts underlying patterns in data. One application of cluster analysis is in text-mining, the analysis of large collections of text to find similarities between documents. We used a collection of about 30,000 tweets extracted from Twitter just before the World Cup started. A common problem with real world text data is the presence of linguistic noise. In our case it would be extraneous tweets that are unrelated to dominant themes. To combat this problem, we created an algorithm that combined the DBSCAN algorithm and a consensus matrix. This ","authors_text":"Caley Johns, Carl Meyer, Carol Sadek, Daniel Godfrey, Shaina Race","cross_cats":["cs.CL","cs.IR","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-08-21T17:58:33Z","title":"A Case Study in Text Mining: Interpreting Twitter Data From World Cup Tweets"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1408.5427","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2b52d4bb9a6e2f3f1bb6a7c8ee4a9abb16c3bfd616780a36739c46ed505382f8","target":"record","created_at":"2026-05-18T02:44:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3f7f59b645e0cd340d32471744e879645feb20eb0d44175a9d06a43e149165a8","cross_cats_sorted":["cs.CL","cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-08-21T17:58:33Z","title_canon_sha256":"1f61f535f50a72e5d5794a235f180d203f6123eec8998f46cf20d444b6f84b47"},"schema_version":"1.0","source":{"id":"1408.5427","kind":"arxiv","version":1}},"canonical_sha256":"d69a02958917dc8d39936d30433976b1ca887e81e4e9cc6f8c9c25db396ab74b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d69a02958917dc8d39936d30433976b1ca887e81e4e9cc6f8c9c25db396ab74b","first_computed_at":"2026-05-18T02:44:25.203583Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:25.203583Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6Tw+3NxF2ZoOeSyhOtFDOTDE54JobaJ7QV+MHLIfZrmrXs5Gi+XUDko5vNoS3JftDYj4BC8jeC3qn2zenndxDA==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:25.203988Z","signed_message":"canonical_sha256_bytes"},"source_id":"1408.5427","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2b52d4bb9a6e2f3f1bb6a7c8ee4a9abb16c3bfd616780a36739c46ed505382f8","sha256:5a61bec48f616a4eb986796b716e40a2f657b7c62dc633295aae0725b4f6242f"],"state_sha256":"c543db838347c5bcdffb4d10f3969ada7651d5b2c76281101f5d8e151a0960e4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LXsNTLF6aVZEGMQsHZ4gYNBlssf8DXcdv5D1rvU77BKOFzDJAW4KyTJbC24XlbQR6ZWVNW7blHgmJn+U8BaSDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T16:03:28.501426Z","bundle_sha256":"e2183dff29a16e2a9392af7896a9a7ca6c9d0a9b3299544d2f0a752c105ef679"}}