{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:OR6VK3JPHRM4QTIIVNCQDFHY5L","short_pith_number":"pith:OR6VK3JP","schema_version":"1.0","canonical_sha256":"747d556d2f3c59c84d08ab450194f8eae44d5948388dbe8e37fc38e20bb38f56","source":{"kind":"arxiv","id":"1605.07723","version":3},"attestation_state":"computed","paper":{"title":"Data Programming: Creating Large Training Sets, Quickly","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Alexander Ratner, Christopher De Sa, Christopher R\\'e, Daniel Selsam, Sen Wu","submitted_at":"2016-05-25T04:14:59Z","abstract_excerpt":"Large labeled training sets are the critical building blocks of supervised learning methods and are key enablers of deep learning techniques. For some applications, creating labeled training sets is the most time-consuming and expensive part of applying machine learning. We therefore propose a paradigm for the programmatic creation of training sets called data programming in which users express weak supervision strategies or domain heuristics as labeling functions, which are programs that label subsets of the data, but that are noisy and may conflict. We show that by explicitly representing th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1605.07723","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-05-25T04:14:59Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"04d2d0e96655a824f61fc3c40878d36648126a37f4192ebdffcb150e0aea1608","abstract_canon_sha256":"3082fbe4cbae221eb851765920b1a7ec21d90769744f9793d9fccf696dfa4dfe"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:58:54.856657Z","signature_b64":"FxWLB7x7vH3zIUtIUTLy5Qfz2Dd5RwMPwYSUWooYWgNy01fdNEqcnub9SHHwZOoBkqCYMZWNAMlyOjtsfAy1Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"747d556d2f3c59c84d08ab450194f8eae44d5948388dbe8e37fc38e20bb38f56","last_reissued_at":"2026-05-17T23:58:54.856265Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:58:54.856265Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Data Programming: Creating Large Training Sets, Quickly","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Alexander Ratner, Christopher De Sa, Christopher R\\'e, Daniel Selsam, Sen Wu","submitted_at":"2016-05-25T04:14:59Z","abstract_excerpt":"Large labeled training sets are the critical building blocks of supervised learning methods and are key enablers of deep learning techniques. For some applications, creating labeled training sets is the most time-consuming and expensive part of applying machine learning. We therefore propose a paradigm for the programmatic creation of training sets called data programming in which users express weak supervision strategies or domain heuristics as labeling functions, which are programs that label subsets of the data, but that are noisy and may conflict. We show that by explicitly representing th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.07723","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1605.07723","created_at":"2026-05-17T23:58:54.856324+00:00"},{"alias_kind":"arxiv_version","alias_value":"1605.07723v3","created_at":"2026-05-17T23:58:54.856324+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.07723","created_at":"2026-05-17T23:58:54.856324+00:00"},{"alias_kind":"pith_short_12","alias_value":"OR6VK3JPHRM4","created_at":"2026-05-18T12:30:36.002864+00:00"},{"alias_kind":"pith_short_16","alias_value":"OR6VK3JPHRM4QTII","created_at":"2026-05-18T12:30:36.002864+00:00"},{"alias_kind":"pith_short_8","alias_value":"OR6VK3JP","created_at":"2026-05-18T12:30:36.002864+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2512.04316","citing_title":"ConsentDiff at Scale: Longitudinal Audits of Web Privacy Policy Changes and UI Frictions","ref_index":22,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L","json":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L.json","graph_json":"https://pith.science/api/pith-number/OR6VK3JPHRM4QTIIVNCQDFHY5L/graph.json","events_json":"https://pith.science/api/pith-number/OR6VK3JPHRM4QTIIVNCQDFHY5L/events.json","paper":"https://pith.science/paper/OR6VK3JP"},"agent_actions":{"view_html":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L","download_json":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L.json","view_paper":"https://pith.science/paper/OR6VK3JP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1605.07723&json=true","fetch_graph":"https://pith.science/api/pith-number/OR6VK3JPHRM4QTIIVNCQDFHY5L/graph.json","fetch_events":"https://pith.science/api/pith-number/OR6VK3JPHRM4QTIIVNCQDFHY5L/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L/action/storage_attestation","attest_author":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L/action/author_attestation","sign_citation":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L/action/citation_signature","submit_replication":"https://pith.science/pith/OR6VK3JPHRM4QTIIVNCQDFHY5L/action/replication_record"}},"created_at":"2026-05-17T23:58:54.856324+00:00","updated_at":"2026-05-17T23:58:54.856324+00:00"}