{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2013:KFUE6FFJYWEXC6KYFSOQS5BWI5","short_pith_number":"pith:KFUE6FFJ","canonical_record":{"source":{"id":"1311.1169","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-11-05T19:31:33Z","cross_cats_sorted":[],"title_canon_sha256":"23ab041b3c30c94fb8794a814ab803b235e1bc156b3aff6ddc9f9dc4e697aa99","abstract_canon_sha256":"344d8d1095d0b8b1e2b3fe54388062e5aed58ab8356639889f4b3aa9948b9be8"},"schema_version":"1.0"},"canonical_sha256":"51684f14a9c5897179582c9d097436474ab6e7e89200969af91f300a1241c63e","source":{"kind":"arxiv","id":"1311.1169","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1311.1169","created_at":"2026-05-18T03:07:55Z"},{"alias_kind":"arxiv_version","alias_value":"1311.1169v1","created_at":"2026-05-18T03:07:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1311.1169","created_at":"2026-05-18T03:07:55Z"},{"alias_kind":"pith_short_12","alias_value":"KFUE6FFJYWEX","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_16","alias_value":"KFUE6FFJYWEXC6KY","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_8","alias_value":"KFUE6FFJ","created_at":"2026-05-18T12:27:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2013:KFUE6FFJYWEXC6KYFSOQS5BWI5","target":"record","payload":{"canonical_record":{"source":{"id":"1311.1169","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-11-05T19:31:33Z","cross_cats_sorted":[],"title_canon_sha256":"23ab041b3c30c94fb8794a814ab803b235e1bc156b3aff6ddc9f9dc4e697aa99","abstract_canon_sha256":"344d8d1095d0b8b1e2b3fe54388062e5aed58ab8356639889f4b3aa9948b9be8"},"schema_version":"1.0"},"canonical_sha256":"51684f14a9c5897179582c9d097436474ab6e7e89200969af91f300a1241c63e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:07:55.398872Z","signature_b64":"b5hkqY6KHAGWE9NXtQ5ovsoOwXmIGRNiKf/CBYkRQS8ZUCokeqLTYX3idihf0OMS5YBp86NWrCUydVefAj6iBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"51684f14a9c5897179582c9d097436474ab6e7e89200969af91f300a1241c63e","last_reissued_at":"2026-05-18T03:07:55.398019Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:07:55.398019Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1311.1169","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:07:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Q8AgAmHMxqGjoWCIFp7FpGVIThSWap6NE0CBVTAg0PTZauidiBAKXJ4lxNAkM0Z7OsK1N1PpW7ajzvkWEFdqAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T12:05:10.882153Z"},"content_sha256":"1adb19c371f835daae15d407ce7d3ce254c59adb9895450a0a627d094b45fb10","schema_version":"1.0","event_id":"sha256:1adb19c371f835daae15d407ce7d3ce254c59adb9895450a0a627d094b45fb10"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2013:KFUE6FFJYWEXC6KYFSOQS5BWI5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Using Robust PCA to estimate regional characteristics of language use from geo-tagged Twitter messages","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"D\\'aniel Kondor, G\\'abor Vattay, Istv\\'an Csabai, J\\'anos Sz\\\"ule, L\\'aszl\\'o Dobos, Norbert Barankai, Tam\\'as Hanyecz, Tam\\'as Seb\\H{o}k, Zs\\'ofia Kallus","submitted_at":"2013-11-05T19:31:33Z","abstract_excerpt":"Principal component analysis (PCA) and related techniques have been successfully employed in natural language processing. Text mining applications in the age of the online social media (OSM) face new challenges due to properties specific to these use cases (e.g. spelling issues specific to texts posted by users, the presence of spammers and bots, service announcements, etc.). In this paper, we employ a Robust PCA technique to separate typical outliers and highly localized topics from the low-dimensional structure present in language use in online social networks. Our focus is on identifying ge"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1311.1169","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:07:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0NDXlsj+WqdlcQ4CTCwaHLm3CxX3l3p3yjU4m17r3UD0p0veimQKZpo5ln1cpJ5a5zBZYsZCiazezAgMq/HTBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T12:05:10.882814Z"},"content_sha256":"06e47b9ac8d8f937b03be96a9d4052aeb9e34dfc4aab2739cc3b472cb9c2f5ed","schema_version":"1.0","event_id":"sha256:06e47b9ac8d8f937b03be96a9d4052aeb9e34dfc4aab2739cc3b472cb9c2f5ed"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5/bundle.json","state_url":"https://pith.science/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T12:05:10Z","links":{"resolver":"https://pith.science/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5","bundle":"https://pith.science/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5/bundle.json","state":"https://pith.science/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KFUE6FFJYWEXC6KYFSOQS5BWI5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2013:KFUE6FFJYWEXC6KYFSOQS5BWI5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"344d8d1095d0b8b1e2b3fe54388062e5aed58ab8356639889f4b3aa9948b9be8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-11-05T19:31:33Z","title_canon_sha256":"23ab041b3c30c94fb8794a814ab803b235e1bc156b3aff6ddc9f9dc4e697aa99"},"schema_version":"1.0","source":{"id":"1311.1169","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1311.1169","created_at":"2026-05-18T03:07:55Z"},{"alias_kind":"arxiv_version","alias_value":"1311.1169v1","created_at":"2026-05-18T03:07:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1311.1169","created_at":"2026-05-18T03:07:55Z"},{"alias_kind":"pith_short_12","alias_value":"KFUE6FFJYWEX","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_16","alias_value":"KFUE6FFJYWEXC6KY","created_at":"2026-05-18T12:27:49Z"},{"alias_kind":"pith_short_8","alias_value":"KFUE6FFJ","created_at":"2026-05-18T12:27:49Z"}],"graph_snapshots":[{"event_id":"sha256:06e47b9ac8d8f937b03be96a9d4052aeb9e34dfc4aab2739cc3b472cb9c2f5ed","target":"graph","created_at":"2026-05-18T03:07:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Principal component analysis (PCA) and related techniques have been successfully employed in natural language processing. Text mining applications in the age of the online social media (OSM) face new challenges due to properties specific to these use cases (e.g. spelling issues specific to texts posted by users, the presence of spammers and bots, service announcements, etc.). In this paper, we employ a Robust PCA technique to separate typical outliers and highly localized topics from the low-dimensional structure present in language use in online social networks. Our focus is on identifying ge","authors_text":"D\\'aniel Kondor, G\\'abor Vattay, Istv\\'an Csabai, J\\'anos Sz\\\"ule, L\\'aszl\\'o Dobos, Norbert Barankai, Tam\\'as Hanyecz, Tam\\'as Seb\\H{o}k, Zs\\'ofia Kallus","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-11-05T19:31:33Z","title":"Using Robust PCA to estimate regional characteristics of language use from geo-tagged Twitter messages"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1311.1169","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1adb19c371f835daae15d407ce7d3ce254c59adb9895450a0a627d094b45fb10","target":"record","created_at":"2026-05-18T03:07:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"344d8d1095d0b8b1e2b3fe54388062e5aed58ab8356639889f4b3aa9948b9be8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2013-11-05T19:31:33Z","title_canon_sha256":"23ab041b3c30c94fb8794a814ab803b235e1bc156b3aff6ddc9f9dc4e697aa99"},"schema_version":"1.0","source":{"id":"1311.1169","kind":"arxiv","version":1}},"canonical_sha256":"51684f14a9c5897179582c9d097436474ab6e7e89200969af91f300a1241c63e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"51684f14a9c5897179582c9d097436474ab6e7e89200969af91f300a1241c63e","first_computed_at":"2026-05-18T03:07:55.398019Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:07:55.398019Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"b5hkqY6KHAGWE9NXtQ5ovsoOwXmIGRNiKf/CBYkRQS8ZUCokeqLTYX3idihf0OMS5YBp86NWrCUydVefAj6iBw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:07:55.398872Z","signed_message":"canonical_sha256_bytes"},"source_id":"1311.1169","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1adb19c371f835daae15d407ce7d3ce254c59adb9895450a0a627d094b45fb10","sha256:06e47b9ac8d8f937b03be96a9d4052aeb9e34dfc4aab2739cc3b472cb9c2f5ed"],"state_sha256":"1c77d8d304c9b6b622d97c4e8ee534094968464b4ec4224ab94207bd6f4efc35"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YLgwjk/umVTSwmNqQoTpGvoGtbTNbekau4Z0f/2kJrExAiMmUWODsVLOF3ZzHdSANE05NslNJSVrJfgSK/b0Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T12:05:10.886462Z","bundle_sha256":"d14a0ebf89b83e29c9e4b298ee035528a288aafc067b288e059d836d80ec5f6d"}}