{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:74DHEVFXRULRWT4XY5GDVVJFEA","short_pith_number":"pith:74DHEVFX","schema_version":"1.0","canonical_sha256":"ff067254b78d171b4f97c74c3ad52520252e56e14e18e7e3bdfefbdb05535732","source":{"kind":"arxiv","id":"1407.7094","version":1},"attestation_state":"computed","paper":{"title":"Crowdsourcing Dialect Characterization through Twitter","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.SI","stat.ML"],"primary_cat":"physics.soc-ph","authors_text":"Bruno Gon\\c{c}alves, David S\\'anchez","submitted_at":"2014-07-26T04:16:31Z","abstract_excerpt":"We perform a large-scale analysis of language diatopic variation using geotagged microblogging datasets. By collecting all Twitter messages written in Spanish over more than two years, we build a corpus from which a carefully selected list of concepts allows us to characterize Spanish varieties on a global scale. A cluster analysis proves the existence of well defined macroregions sharing common lexical properties. Remarkably enough, we find that Spanish language is split into two superdialects, namely, an urban speech used across major American and Spanish citites and a diverse form that enco"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1407.7094","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"physics.soc-ph","submitted_at":"2014-07-26T04:16:31Z","cross_cats_sorted":["cs.CL","cs.SI","stat.ML"],"title_canon_sha256":"0e1245b0f11c5b45fba86ff003f1b6971bcfe75915cf0af70bc2904e64157fe8","abstract_canon_sha256":"64c322c6c9431ab7f2aab362b570508af27edfd3a199d4ddea9395f8893199a8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:34:44.794985Z","signature_b64":"W5VpDO+eIuE9Yf+PFHM5hnc3xaRgsSQkLcCMtDTM8ObzeOFPTNGGIqyUob1scLVXLfEZtlJ3My4/yN+8KxMlBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ff067254b78d171b4f97c74c3ad52520252e56e14e18e7e3bdfefbdb05535732","last_reissued_at":"2026-05-18T02:34:44.794510Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:34:44.794510Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Crowdsourcing Dialect Characterization through Twitter","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.SI","stat.ML"],"primary_cat":"physics.soc-ph","authors_text":"Bruno Gon\\c{c}alves, David S\\'anchez","submitted_at":"2014-07-26T04:16:31Z","abstract_excerpt":"We perform a large-scale analysis of language diatopic variation using geotagged microblogging datasets. By collecting all Twitter messages written in Spanish over more than two years, we build a corpus from which a carefully selected list of concepts allows us to characterize Spanish varieties on a global scale. A cluster analysis proves the existence of well defined macroregions sharing common lexical properties. Remarkably enough, we find that Spanish language is split into two superdialects, namely, an urban speech used across major American and Spanish citites and a diverse form that enco"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1407.7094","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1407.7094","created_at":"2026-05-18T02:34:44.794575+00:00"},{"alias_kind":"arxiv_version","alias_value":"1407.7094v1","created_at":"2026-05-18T02:34:44.794575+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1407.7094","created_at":"2026-05-18T02:34:44.794575+00:00"},{"alias_kind":"pith_short_12","alias_value":"74DHEVFXRULR","created_at":"2026-05-18T12:28:16.859392+00:00"},{"alias_kind":"pith_short_16","alias_value":"74DHEVFXRULRWT4X","created_at":"2026-05-18T12:28:16.859392+00:00"},{"alias_kind":"pith_short_8","alias_value":"74DHEVFX","created_at":"2026-05-18T12:28:16.859392+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA","json":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA.json","graph_json":"https://pith.science/api/pith-number/74DHEVFXRULRWT4XY5GDVVJFEA/graph.json","events_json":"https://pith.science/api/pith-number/74DHEVFXRULRWT4XY5GDVVJFEA/events.json","paper":"https://pith.science/paper/74DHEVFX"},"agent_actions":{"view_html":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA","download_json":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA.json","view_paper":"https://pith.science/paper/74DHEVFX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1407.7094&json=true","fetch_graph":"https://pith.science/api/pith-number/74DHEVFXRULRWT4XY5GDVVJFEA/graph.json","fetch_events":"https://pith.science/api/pith-number/74DHEVFXRULRWT4XY5GDVVJFEA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA/action/storage_attestation","attest_author":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA/action/author_attestation","sign_citation":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA/action/citation_signature","submit_replication":"https://pith.science/pith/74DHEVFXRULRWT4XY5GDVVJFEA/action/replication_record"}},"created_at":"2026-05-18T02:34:44.794575+00:00","updated_at":"2026-05-18T02:34:44.794575+00:00"}