{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2012:HA6UWNQKOIJKSBSD675UYNBYO3","short_pith_number":"pith:HA6UWNQK","schema_version":"1.0","canonical_sha256":"383d4b360a7212a90643f7fb4c343876c56c39df533fef8f6145c2f5b509e68f","source":{"kind":"arxiv","id":"1212.0960","version":1},"attestation_state":"computed","paper":{"title":"Evaluating Classifiers Without Expert Labels","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Hyun Joon Jung, Matthew Lease","submitted_at":"2012-12-05T08:15:36Z","abstract_excerpt":"This paper considers the challenge of evaluating a set of classifiers, as done in shared task evaluations like the KDD Cup or NIST TREC, without expert labels. While expert labels provide the traditional cornerstone for evaluating statistical learners, limited or expensive access to experts represents a practical bottleneck. Instead, we seek methodology for estimating performance of the classifiers which is more scalable than expert labeling yet preserves high correlation with evaluation based on expert labels. We consider both: 1) using only labels automatically generated by the classifiers ("},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1212.0960","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-12-05T08:15:36Z","cross_cats_sorted":["cs.IR","stat.ML"],"title_canon_sha256":"231671e6056569dafb0fc7bcca7c0ca7e2c250644095c93f70674d4b4f588c32","abstract_canon_sha256":"c88663130f251db0c7a14bd0c81b0bbdf80255353114cf198258e5db0563fb51"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:39:06.838155Z","signature_b64":"QpWsuQPFokbbqpaEcl96u9124Q3Fk9z8PBQ981qhFQTN52Y9bY8owbW9ueMTNEryyZOpCn+EuGmxkWvvj5o0Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"383d4b360a7212a90643f7fb4c343876c56c39df533fef8f6145c2f5b509e68f","last_reissued_at":"2026-05-18T03:39:06.837478Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:39:06.837478Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Evaluating Classifiers Without Expert Labels","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Hyun Joon Jung, Matthew Lease","submitted_at":"2012-12-05T08:15:36Z","abstract_excerpt":"This paper considers the challenge of evaluating a set of classifiers, as done in shared task evaluations like the KDD Cup or NIST TREC, without expert labels. While expert labels provide the traditional cornerstone for evaluating statistical learners, limited or expensive access to experts represents a practical bottleneck. Instead, we seek methodology for estimating performance of the classifiers which is more scalable than expert labeling yet preserves high correlation with evaluation based on expert labels. We consider both: 1) using only labels automatically generated by the classifiers ("},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1212.0960","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1212.0960","created_at":"2026-05-18T03:39:06.837575+00:00"},{"alias_kind":"arxiv_version","alias_value":"1212.0960v1","created_at":"2026-05-18T03:39:06.837575+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1212.0960","created_at":"2026-05-18T03:39:06.837575+00:00"},{"alias_kind":"pith_short_12","alias_value":"HA6UWNQKOIJK","created_at":"2026-05-18T12:27:09.501522+00:00"},{"alias_kind":"pith_short_16","alias_value":"HA6UWNQKOIJKSBSD","created_at":"2026-05-18T12:27:09.501522+00:00"},{"alias_kind":"pith_short_8","alias_value":"HA6UWNQK","created_at":"2026-05-18T12:27:09.501522+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2604.24824","citing_title":"Negative Ontology of True Target for Machine Learning: Towards Evaluation and Learning under Democratic Supervision","ref_index":58,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24824","citing_title":"Negative Ontology of True Target for Machine Learning: Towards Evaluation and Learning under Democratic Supervision","ref_index":58,"is_internal_anchor":false},{"citing_arxiv_id":"2604.20944","citing_title":"LAF-Based Evaluation and UTTL-Based Learning Strategies with MIATTs","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"2604.24824","citing_title":"Negative Ontology of True Target for Machine Learning: Towards Evaluation and Learning under Democratic Supervision","ref_index":58,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3","json":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3.json","graph_json":"https://pith.science/api/pith-number/HA6UWNQKOIJKSBSD675UYNBYO3/graph.json","events_json":"https://pith.science/api/pith-number/HA6UWNQKOIJKSBSD675UYNBYO3/events.json","paper":"https://pith.science/paper/HA6UWNQK"},"agent_actions":{"view_html":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3","download_json":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3.json","view_paper":"https://pith.science/paper/HA6UWNQK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1212.0960&json=true","fetch_graph":"https://pith.science/api/pith-number/HA6UWNQKOIJKSBSD675UYNBYO3/graph.json","fetch_events":"https://pith.science/api/pith-number/HA6UWNQKOIJKSBSD675UYNBYO3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3/action/storage_attestation","attest_author":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3/action/author_attestation","sign_citation":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3/action/citation_signature","submit_replication":"https://pith.science/pith/HA6UWNQKOIJKSBSD675UYNBYO3/action/replication_record"}},"created_at":"2026-05-18T03:39:06.837575+00:00","updated_at":"2026-05-18T03:39:06.837575+00:00"}