{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:YHT5ZVAK3ZPMJQ4G4THP22MMT2","short_pith_number":"pith:YHT5ZVAK","schema_version":"1.0","canonical_sha256":"c1e7dcd40ade5ec4c386e4cefd698c9eb17e0fa7df4acb5338c8b9864e9c574d","source":{"kind":"arxiv","id":"2310.04452","version":1},"attestation_state":"computed","paper":{"title":"Short text classification with machine learning in the social sciences: The case of climate change on Twitter","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","stat.ME"],"primary_cat":"cs.CL","authors_text":"Karina Shyrokykh, Lisa Dellmuth, Maksym Girnyk","submitted_at":"2023-10-03T22:09:43Z","abstract_excerpt":"To analyse large numbers of texts, social science researchers are increasingly confronting the challenge of text classification. When manual labeling is not possible and researchers have to find automatized ways to classify texts, computer science provides a useful toolbox of machine-learning methods whose performance remains understudied in the social sciences. In this article, we compare the performance of the most widely used text classifiers by applying them to a typical research scenario in social science research: a relatively small labeled dataset with infrequent occurrence of categorie"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2310.04452","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2023-10-03T22:09:43Z","cross_cats_sorted":["cs.LG","stat.ME"],"title_canon_sha256":"afb149ce0d176f89daaf5edd8a1d344a3839d62cd115339666c7684c5451bcc4","abstract_canon_sha256":"925b3566659d51785d1b11cc127ac685300eb3cabe6006e70dcb5138cf3eb895"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T06:58:18.474412Z","signature_b64":"UX+pVwLwpKsdw5+LT40YP+8ceelw5nr5cmiHWcXkUUsid0LYTv4cTh3n0gWLOYgFzHThl5xoOIXFKa8IJHnjAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c1e7dcd40ade5ec4c386e4cefd698c9eb17e0fa7df4acb5338c8b9864e9c574d","last_reissued_at":"2026-07-05T06:58:18.473974Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T06:58:18.473974Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Short text classification with machine learning in the social sciences: The case of climate change on Twitter","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","stat.ME"],"primary_cat":"cs.CL","authors_text":"Karina Shyrokykh, Lisa Dellmuth, Maksym Girnyk","submitted_at":"2023-10-03T22:09:43Z","abstract_excerpt":"To analyse large numbers of texts, social science researchers are increasingly confronting the challenge of text classification. When manual labeling is not possible and researchers have to find automatized ways to classify texts, computer science provides a useful toolbox of machine-learning methods whose performance remains understudied in the social sciences. In this article, we compare the performance of the most widely used text classifiers by applying them to a typical research scenario in social science research: a relatively small labeled dataset with infrequent occurrence of categorie"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2310.04452","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2310.04452/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2310.04452","created_at":"2026-07-05T06:58:18.474029+00:00"},{"alias_kind":"arxiv_version","alias_value":"2310.04452v1","created_at":"2026-07-05T06:58:18.474029+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2310.04452","created_at":"2026-07-05T06:58:18.474029+00:00"},{"alias_kind":"pith_short_12","alias_value":"YHT5ZVAK3ZPM","created_at":"2026-07-05T06:58:18.474029+00:00"},{"alias_kind":"pith_short_16","alias_value":"YHT5ZVAK3ZPMJQ4G","created_at":"2026-07-05T06:58:18.474029+00:00"},{"alias_kind":"pith_short_8","alias_value":"YHT5ZVAK","created_at":"2026-07-05T06:58:18.474029+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2","json":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2.json","graph_json":"https://pith.science/api/pith-number/YHT5ZVAK3ZPMJQ4G4THP22MMT2/graph.json","events_json":"https://pith.science/api/pith-number/YHT5ZVAK3ZPMJQ4G4THP22MMT2/events.json","paper":"https://pith.science/paper/YHT5ZVAK"},"agent_actions":{"view_html":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2","download_json":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2.json","view_paper":"https://pith.science/paper/YHT5ZVAK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2310.04452&json=true","fetch_graph":"https://pith.science/api/pith-number/YHT5ZVAK3ZPMJQ4G4THP22MMT2/graph.json","fetch_events":"https://pith.science/api/pith-number/YHT5ZVAK3ZPMJQ4G4THP22MMT2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2/action/storage_attestation","attest_author":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2/action/author_attestation","sign_citation":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2/action/citation_signature","submit_replication":"https://pith.science/pith/YHT5ZVAK3ZPMJQ4G4THP22MMT2/action/replication_record"}},"created_at":"2026-07-05T06:58:18.474029+00:00","updated_at":"2026-07-05T06:58:18.474029+00:00"}