{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2013:HWEXRCOY76PDCCF3URVFRQXIIP","short_pith_number":"pith:HWEXRCOY","schema_version":"1.0","canonical_sha256":"3d897889d8ff9e3108bba46a58c2e843eff2d26e972786b14abf9e3f8731cf11","source":{"kind":"arxiv","id":"1307.3673","version":1},"attestation_state":"computed","paper":{"title":"A Data Management Approach for Dataset Selection Using Human Computation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.LG","authors_text":"Alexandros Ntoulas, Omar Alonso, Vasilis Kandylas","submitted_at":"2013-07-13T19:29:33Z","abstract_excerpt":"As the number of applications that use machine learning algorithms increases, the need for labeled data useful for training such algorithms intensifies.\n  Getting labels typically involves employing humans to do the annotation, which directly translates to training and working costs. Crowdsourcing platforms have made labeling cheaper and faster, but they still involve significant costs, especially for the cases where the potential set of candidate data to be labeled is large. In this paper we describe a methodology and a prototype system aiming at addressing this challenge for Web-scale proble"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1307.3673","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2013-07-13T19:29:33Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"1adbff6b9b7e0a5191ccee6fc86257148bd326847d331c6acb927578eee57f76","abstract_canon_sha256":"9a65334b210ff5dbf7dbc65c597ac33185fb8fc88d18a39a9bb3423cd9a1b0d0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:18:28.249195Z","signature_b64":"aidDXw5RFQUPMlH+7JZdUgXMIZlNNgkshYQOemCMY3h301RV/llU4lHtHeofVt8cp3R1lBohDXDfv7rCyxVCCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3d897889d8ff9e3108bba46a58c2e843eff2d26e972786b14abf9e3f8731cf11","last_reissued_at":"2026-05-18T03:18:28.248710Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:18:28.248710Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Data Management Approach for Dataset Selection Using Human Computation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.LG","authors_text":"Alexandros Ntoulas, Omar Alonso, Vasilis Kandylas","submitted_at":"2013-07-13T19:29:33Z","abstract_excerpt":"As the number of applications that use machine learning algorithms increases, the need for labeled data useful for training such algorithms intensifies.\n  Getting labels typically involves employing humans to do the annotation, which directly translates to training and working costs. Crowdsourcing platforms have made labeling cheaper and faster, but they still involve significant costs, especially for the cases where the potential set of candidate data to be labeled is large. In this paper we describe a methodology and a prototype system aiming at addressing this challenge for Web-scale proble"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1307.3673","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1307.3673","created_at":"2026-05-18T03:18:28.248778+00:00"},{"alias_kind":"arxiv_version","alias_value":"1307.3673v1","created_at":"2026-05-18T03:18:28.248778+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1307.3673","created_at":"2026-05-18T03:18:28.248778+00:00"},{"alias_kind":"pith_short_12","alias_value":"HWEXRCOY76PD","created_at":"2026-05-18T12:27:46.883200+00:00"},{"alias_kind":"pith_short_16","alias_value":"HWEXRCOY76PDCCF3","created_at":"2026-05-18T12:27:46.883200+00:00"},{"alias_kind":"pith_short_8","alias_value":"HWEXRCOY","created_at":"2026-05-18T12:27:46.883200+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP","json":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP.json","graph_json":"https://pith.science/api/pith-number/HWEXRCOY76PDCCF3URVFRQXIIP/graph.json","events_json":"https://pith.science/api/pith-number/HWEXRCOY76PDCCF3URVFRQXIIP/events.json","paper":"https://pith.science/paper/HWEXRCOY"},"agent_actions":{"view_html":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP","download_json":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP.json","view_paper":"https://pith.science/paper/HWEXRCOY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1307.3673&json=true","fetch_graph":"https://pith.science/api/pith-number/HWEXRCOY76PDCCF3URVFRQXIIP/graph.json","fetch_events":"https://pith.science/api/pith-number/HWEXRCOY76PDCCF3URVFRQXIIP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP/action/storage_attestation","attest_author":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP/action/author_attestation","sign_citation":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP/action/citation_signature","submit_replication":"https://pith.science/pith/HWEXRCOY76PDCCF3URVFRQXIIP/action/replication_record"}},"created_at":"2026-05-18T03:18:28.248778+00:00","updated_at":"2026-05-18T03:18:28.248778+00:00"}