{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:D3UN5SZWGWP7MKVGTA6PJUTVMW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"874dad9cdd0775b3081e3f3dfbec56d6d6b48afce715c2b381fc61e7f99455dd","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T21:46:59Z","title_canon_sha256":"a49bac36a09a4d6d383942e979237e3f2c7a38143f963b3404fc75216eb0a410"},"schema_version":"1.0","source":{"id":"1806.06301","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.06301","created_at":"2026-05-18T00:13:02Z"},{"alias_kind":"arxiv_version","alias_value":"1806.06301v1","created_at":"2026-05-18T00:13:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.06301","created_at":"2026-05-18T00:13:02Z"},{"alias_kind":"pith_short_12","alias_value":"D3UN5SZWGWP7","created_at":"2026-05-18T12:32:19Z"},{"alias_kind":"pith_short_16","alias_value":"D3UN5SZWGWP7MKVG","created_at":"2026-05-18T12:32:19Z"},{"alias_kind":"pith_short_8","alias_value":"D3UN5SZW","created_at":"2026-05-18T12:32:19Z"}],"graph_snapshots":[{"event_id":"sha256:8497ef0075cce6eb5ee05e18537466f08078835e34688c9b203e36b6d25eeb5f","target":"graph","created_at":"2026-05-18T00:13:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many modern Artificial Intelligence (AI) systems make use of data embeddings, particularly in the domain of Natural Language Processing (NLP). These embeddings are learnt from data that has been gathered \"from the wild\" and have been found to contain unwanted biases. In this paper we make three contributions towards measuring, understanding and removing this problem. We present a rigorous way to measure some of these biases, based on the use of word lists created for social psychology applications; we observe how gender bias in occupations reflects actual gender bias in the same occupations in","authors_text":"Adam Sutton, Nello Cristianini, Thomas Lansdall-Welfare","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T21:46:59Z","title":"Biased Embeddings from Wild Data: Measuring, Understanding and Removing"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.06301","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6e9a31d2f2d96183d993303baab2c14eac130e2c2353e82f0e2441b939196e42","target":"record","created_at":"2026-05-18T00:13:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"874dad9cdd0775b3081e3f3dfbec56d6d6b48afce715c2b381fc61e7f99455dd","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T21:46:59Z","title_canon_sha256":"a49bac36a09a4d6d383942e979237e3f2c7a38143f963b3404fc75216eb0a410"},"schema_version":"1.0","source":{"id":"1806.06301","kind":"arxiv","version":1}},"canonical_sha256":"1ee8decb36359ff62aa6983cf4d27565825461ebea3374ffdb2ce6146ddc06d4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1ee8decb36359ff62aa6983cf4d27565825461ebea3374ffdb2ce6146ddc06d4","first_computed_at":"2026-05-18T00:13:02.938778Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:02.938778Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"oRjlIog64lppqp6fB5jaLQAjMbUGUiCR7ZhIcx6+ni41cnjPIL8XEVa4c8D8yKgkmEIx4u6x82C2OTmNOWyMDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:02.939515Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.06301","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6e9a31d2f2d96183d993303baab2c14eac130e2c2353e82f0e2441b939196e42","sha256:8497ef0075cce6eb5ee05e18537466f08078835e34688c9b203e36b6d25eeb5f"],"state_sha256":"78fdd631348016a63930c982daab702c730c885aebed15f5fdcb645f5aab674d"}