{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:UTITDBCIZOOG6ELUTUPTGBCI6T","short_pith_number":"pith:UTITDBCI","canonical_record":{"source":{"id":"1811.01910","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-11-05T18:39:54Z","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"title_canon_sha256":"73daee9b537a8f0ca9fc155da528597a716c79cd8d848cfc5c5b6ca945cc4846","abstract_canon_sha256":"0fc69870ebadafac139941706fa495d009a5e683b206c78ee8240e15bf3f8df5"},"schema_version":"1.0"},"canonical_sha256":"a4d1318448cb9c6f11749d1f330448f4cd1c36fa3a4f4792edf0992c5fe9cf22","source":{"kind":"arxiv","id":"1811.01910","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.01910","created_at":"2026-05-17T23:58:52Z"},{"alias_kind":"arxiv_version","alias_value":"1811.01910v2","created_at":"2026-05-17T23:58:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.01910","created_at":"2026-05-17T23:58:52Z"},{"alias_kind":"pith_short_12","alias_value":"UTITDBCIZOOG","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"UTITDBCIZOOG6ELU","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"UTITDBCI","created_at":"2026-05-18T12:32:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:UTITDBCIZOOG6ELUTUPTGBCI6T","target":"record","payload":{"canonical_record":{"source":{"id":"1811.01910","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-11-05T18:39:54Z","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"title_canon_sha256":"73daee9b537a8f0ca9fc155da528597a716c79cd8d848cfc5c5b6ca945cc4846","abstract_canon_sha256":"0fc69870ebadafac139941706fa495d009a5e683b206c78ee8240e15bf3f8df5"},"schema_version":"1.0"},"canonical_sha256":"a4d1318448cb9c6f11749d1f330448f4cd1c36fa3a4f4792edf0992c5fe9cf22","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:58:52.395157Z","signature_b64":"+qi585RR9LpM0X7MJwMXBdVdT2QgA9ECxhP0x5FAePOIrtAi5bMAv9TrlJfRm08cDC1Y5Ean5EO0hA0Vp3b5Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a4d1318448cb9c6f11749d1f330448f4cd1c36fa3a4f4792edf0992c5fe9cf22","last_reissued_at":"2026-05-17T23:58:52.394655Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:58:52.394655Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.01910","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:58:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7eZP1RDO0H6OEQD5gPh3IG6BJibjJF1r6/RBGwSme0o8yxctRVIv5o5/THJCTdEc6f2DGSrES6P/D5ffO6i8Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T10:41:14.587634Z"},"content_sha256":"3b7e5500e70dd8ccd29ba5e710bc4c2d4443edfc8aa0302fe8b81f92e7ff1cdf","schema_version":"1.0","event_id":"sha256:3b7e5500e70dd8ccd29ba5e710bc4c2d4443edfc8aa0302fe8b81f92e7ff1cdf"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:UTITDBCIZOOG6ELUTUPTGBCI6T","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Evolutionary Data Measures: Understanding the Difficulty of Text Classification Tasks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.NE"],"primary_cat":"cs.CL","authors_text":"Bingbing Zhang, Edward Collins, Nikolai Rozanov","submitted_at":"2018-11-05T18:39:54Z","abstract_excerpt":"Classification tasks are usually analysed and improved through new model architectures or hyperparameter optimisation but the underlying properties of datasets are discovered on an ad-hoc basis as errors occur. However, understanding the properties of the data is crucial in perfecting models. In this paper we analyse exactly which characteristics of a dataset best determine how difficult that dataset is for the task of text classification. We then propose an intuitive measure of difficulty for text classification datasets which is simple and fast to calculate. We show that this measure general"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.01910","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:58:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5IKGsAG5dxQVIy7UzcoRGPQKy8vSbbNuSjd9/DGAAoeFvxwwf7XP/GLRiTzsQYNQAx2rCZSS4Ajl0D8gsnFcAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T10:41:14.587984Z"},"content_sha256":"bb3df678863b2f7c3331206c756df5ac0e24f167806f9a25b3a8a08ed4d79829","schema_version":"1.0","event_id":"sha256:bb3df678863b2f7c3331206c756df5ac0e24f167806f9a25b3a8a08ed4d79829"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UTITDBCIZOOG6ELUTUPTGBCI6T/bundle.json","state_url":"https://pith.science/pith/UTITDBCIZOOG6ELUTUPTGBCI6T/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UTITDBCIZOOG6ELUTUPTGBCI6T/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T10:41:14Z","links":{"resolver":"https://pith.science/pith/UTITDBCIZOOG6ELUTUPTGBCI6T","bundle":"https://pith.science/pith/UTITDBCIZOOG6ELUTUPTGBCI6T/bundle.json","state":"https://pith.science/pith/UTITDBCIZOOG6ELUTUPTGBCI6T/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UTITDBCIZOOG6ELUTUPTGBCI6T/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:UTITDBCIZOOG6ELUTUPTGBCI6T","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0fc69870ebadafac139941706fa495d009a5e683b206c78ee8240e15bf3f8df5","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-11-05T18:39:54Z","title_canon_sha256":"73daee9b537a8f0ca9fc155da528597a716c79cd8d848cfc5c5b6ca945cc4846"},"schema_version":"1.0","source":{"id":"1811.01910","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.01910","created_at":"2026-05-17T23:58:52Z"},{"alias_kind":"arxiv_version","alias_value":"1811.01910v2","created_at":"2026-05-17T23:58:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.01910","created_at":"2026-05-17T23:58:52Z"},{"alias_kind":"pith_short_12","alias_value":"UTITDBCIZOOG","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"UTITDBCIZOOG6ELU","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"UTITDBCI","created_at":"2026-05-18T12:32:56Z"}],"graph_snapshots":[{"event_id":"sha256:bb3df678863b2f7c3331206c756df5ac0e24f167806f9a25b3a8a08ed4d79829","target":"graph","created_at":"2026-05-17T23:58:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Classification tasks are usually analysed and improved through new model architectures or hyperparameter optimisation but the underlying properties of datasets are discovered on an ad-hoc basis as errors occur. However, understanding the properties of the data is crucial in perfecting models. In this paper we analyse exactly which characteristics of a dataset best determine how difficult that dataset is for the task of text classification. We then propose an intuitive measure of difficulty for text classification datasets which is simple and fast to calculate. We show that this measure general","authors_text":"Bingbing Zhang, Edward Collins, Nikolai Rozanov","cross_cats":["cs.AI","cs.LG","cs.NE"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-11-05T18:39:54Z","title":"Evolutionary Data Measures: Understanding the Difficulty of Text Classification Tasks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.01910","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3b7e5500e70dd8ccd29ba5e710bc4c2d4443edfc8aa0302fe8b81f92e7ff1cdf","target":"record","created_at":"2026-05-17T23:58:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0fc69870ebadafac139941706fa495d009a5e683b206c78ee8240e15bf3f8df5","cross_cats_sorted":["cs.AI","cs.LG","cs.NE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2018-11-05T18:39:54Z","title_canon_sha256":"73daee9b537a8f0ca9fc155da528597a716c79cd8d848cfc5c5b6ca945cc4846"},"schema_version":"1.0","source":{"id":"1811.01910","kind":"arxiv","version":2}},"canonical_sha256":"a4d1318448cb9c6f11749d1f330448f4cd1c36fa3a4f4792edf0992c5fe9cf22","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a4d1318448cb9c6f11749d1f330448f4cd1c36fa3a4f4792edf0992c5fe9cf22","first_computed_at":"2026-05-17T23:58:52.394655Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:58:52.394655Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+qi585RR9LpM0X7MJwMXBdVdT2QgA9ECxhP0x5FAePOIrtAi5bMAv9TrlJfRm08cDC1Y5Ean5EO0hA0Vp3b5Bg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:58:52.395157Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.01910","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3b7e5500e70dd8ccd29ba5e710bc4c2d4443edfc8aa0302fe8b81f92e7ff1cdf","sha256:bb3df678863b2f7c3331206c756df5ac0e24f167806f9a25b3a8a08ed4d79829"],"state_sha256":"5d5d5164f928695d1c92cb66643387650d26ad57be429764cc45af7cfd6c0c7f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J0KV751ayxay3GG4h+kknCezpQCYIlsxe16gmVOv4NMrXvhyIftAU4r095Hx3doNlrFAvaZTBDOKgO9fj0uhCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T10:41:14.589960Z","bundle_sha256":"28cdab6bd33d2e0e1087ff3fdab53ede37e1bf301f16c429103fd4e72c274d67"}}