{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:R4AYTVPBZQGTQOUS5SFEG5ZII7","short_pith_number":"pith:R4AYTVPB","schema_version":"1.0","canonical_sha256":"8f0189d5e1cc0d383a92ec8a43772847d1131b5388a4806c9aafc75a3e357882","source":{"kind":"arxiv","id":"1811.06524","version":1},"attestation_state":"computed","paper":{"title":"Exploiting Class Learnability in Noisy Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Eric Heim, James Hendler, Matthew Klawonn","submitted_at":"2018-11-15T18:42:30Z","abstract_excerpt":"In many domains, collecting sufficient labeled training data for supervised machine learning requires easily accessible but noisy sources, such as crowdsourcing services or tagged Web data. Noisy labels occur frequently in data sets harvested via these means, sometimes resulting in entire classes of data on which learned classifiers generalize poorly. For real world applications, we argue that it can be beneficial to avoid training on such classes entirely. In this work, we aim to explore the classes in a given data set, and guide supervised training to spend time on a class proportional to it"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.06524","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T18:42:30Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"a123ee2c773d9586b91796ebae6f5f526999744462032bba1c15c585ef78f95d","abstract_canon_sha256":"3da2865ef701cc67c1b2ce5c82fbdd617a11c4a7da1dc5967640f285c0f73ca8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:37.543007Z","signature_b64":"K9bkNN3GUus/1UsxcyxMtLpn5qm3wdWiaFWCfgFaT5dEGbWjHjHrMiECdxqiTdyEc48lxwkDfcmLUnTcBrGqDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f0189d5e1cc0d383a92ec8a43772847d1131b5388a4806c9aafc75a3e357882","last_reissued_at":"2026-05-18T00:00:37.542558Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:37.542558Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Exploiting Class Learnability in Noisy Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Eric Heim, James Hendler, Matthew Klawonn","submitted_at":"2018-11-15T18:42:30Z","abstract_excerpt":"In many domains, collecting sufficient labeled training data for supervised machine learning requires easily accessible but noisy sources, such as crowdsourcing services or tagged Web data. Noisy labels occur frequently in data sets harvested via these means, sometimes resulting in entire classes of data on which learned classifiers generalize poorly. For real world applications, we argue that it can be beneficial to avoid training on such classes entirely. In this work, we aim to explore the classes in a given data set, and guide supervised training to spend time on a class proportional to it"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.06524","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.06524","created_at":"2026-05-18T00:00:37.542625+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.06524v1","created_at":"2026-05-18T00:00:37.542625+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.06524","created_at":"2026-05-18T00:00:37.542625+00:00"},{"alias_kind":"pith_short_12","alias_value":"R4AYTVPBZQGT","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_16","alias_value":"R4AYTVPBZQGTQOUS","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_8","alias_value":"R4AYTVPB","created_at":"2026-05-18T12:32:50.500415+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7","json":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7.json","graph_json":"https://pith.science/api/pith-number/R4AYTVPBZQGTQOUS5SFEG5ZII7/graph.json","events_json":"https://pith.science/api/pith-number/R4AYTVPBZQGTQOUS5SFEG5ZII7/events.json","paper":"https://pith.science/paper/R4AYTVPB"},"agent_actions":{"view_html":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7","download_json":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7.json","view_paper":"https://pith.science/paper/R4AYTVPB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.06524&json=true","fetch_graph":"https://pith.science/api/pith-number/R4AYTVPBZQGTQOUS5SFEG5ZII7/graph.json","fetch_events":"https://pith.science/api/pith-number/R4AYTVPBZQGTQOUS5SFEG5ZII7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7/action/storage_attestation","attest_author":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7/action/author_attestation","sign_citation":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7/action/citation_signature","submit_replication":"https://pith.science/pith/R4AYTVPBZQGTQOUS5SFEG5ZII7/action/replication_record"}},"created_at":"2026-05-18T00:00:37.542625+00:00","updated_at":"2026-05-18T00:00:37.542625+00:00"}