{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:UDJLH23DZQU4SJBNTB5AIIFYNT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"13f74f051e08a711e99b05b3a1c7021b6fb3622e24d52441ec72c042ff4d6b4f","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-09-29T11:46:07Z","title_canon_sha256":"e2d289671749d229d60550678d420f854c12197fc3bcf7764166fa90daef82cd"},"schema_version":"1.0","source":{"id":"2010.02005","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2010.02005","created_at":"2026-07-05T01:40:11Z"},{"alias_kind":"arxiv_version","alias_value":"2010.02005v1","created_at":"2026-07-05T01:40:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2010.02005","created_at":"2026-07-05T01:40:11Z"},{"alias_kind":"pith_short_12","alias_value":"UDJLH23DZQU4","created_at":"2026-07-05T01:40:11Z"},{"alias_kind":"pith_short_16","alias_value":"UDJLH23DZQU4SJBN","created_at":"2026-07-05T01:40:11Z"},{"alias_kind":"pith_short_8","alias_value":"UDJLH23D","created_at":"2026-07-05T01:40:11Z"}],"graph_snapshots":[{"event_id":"sha256:e6645b37b08af67b096ddf1838494fb5e81fc7997405b893f327bbca443bb979","target":"graph","created_at":"2026-07-05T01:40:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2010.02005/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Transformer models have shown impressive performance on a variety of NLP tasks. Off-the-shelf, pre-trained models can be fine-tuned for specific NLP classification tasks, reducing the need for large amounts of additional training data. However, little research has addressed how much data is required to accurately fine-tune such pre-trained transformer models, and how much data is needed for accurate prediction. This paper explores the usability of BERT (a Transformer model for word embedding) for gender prediction on social media. Forensic applications include detecting gender obfuscation, e.g","authors_text":"Maaike Burghoorn, Maaike H.T. de Boer, Stephan Raaijmakers","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-09-29T11:46:07Z","title":"Gender prediction using limited Twitter Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2010.02005","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:75b7788db3213384f3530f8ae0b0ca2c0b1991abdbcb1ae67b3a84cbfc9ae666","target":"record","created_at":"2026-07-05T01:40:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"13f74f051e08a711e99b05b3a1c7021b6fb3622e24d52441ec72c042ff4d6b4f","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-09-29T11:46:07Z","title_canon_sha256":"e2d289671749d229d60550678d420f854c12197fc3bcf7764166fa90daef82cd"},"schema_version":"1.0","source":{"id":"2010.02005","kind":"arxiv","version":1}},"canonical_sha256":"a0d2b3eb63cc29c9242d987a0420b86cd723f443e216f92100171334a4fae99a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a0d2b3eb63cc29c9242d987a0420b86cd723f443e216f92100171334a4fae99a","first_computed_at":"2026-07-05T01:40:11.478194Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T01:40:11.478194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Z3YyQ81TFz8tmbetmKZ1130MKfWAuEehv780e+0v8ck8J3UbjoS9Gnipa1QxLhNgXlFcuJO7cDFiYQgS005OCg==","signature_status":"signed_v1","signed_at":"2026-07-05T01:40:11.478548Z","signed_message":"canonical_sha256_bytes"},"source_id":"2010.02005","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:75b7788db3213384f3530f8ae0b0ca2c0b1991abdbcb1ae67b3a84cbfc9ae666","sha256:e6645b37b08af67b096ddf1838494fb5e81fc7997405b893f327bbca443bb979"],"state_sha256":"d175802cb6e81b2852d55a73581bef6f2b069154288e113d3c3c236862d09ae9"}