{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:RHRP56Q7PU5OINACPXLDJRWTOE","short_pith_number":"pith:RHRP56Q7","schema_version":"1.0","canonical_sha256":"89e2fefa1f7d3ae434027dd634c6d37132623f30892d0678503df0296af49434","source":{"kind":"arxiv","id":"1903.11101","version":1},"attestation_state":"computed","paper":{"title":"Cross-Modal Data Programming Enables Rapid Medical Machine Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["eess.IV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexander Ratner, Christopher Lee-Messer, Christopher R\\'e, Daniel Rubin, Hersh Sagreiya, Jared Dunnmon, Khaled Saab, Matthew Lungren, Matthew Markert, Nishith Khandwala, Roger Goldman","submitted_at":"2019-03-26T18:12:34Z","abstract_excerpt":"Labeling training datasets has become a key barrier to building medical machine learning models. One strategy is to generate training labels programmatically, for example by applying natural language processing pipelines to text reports associated with imaging studies. We propose cross-modal data programming, which generalizes this intuitive strategy in a theoretically-grounded way that enables simpler, clinician-driven input, reduces required labeling time, and improves with additional unlabeled data. In this approach, clinicians generate training labels for models defined over a target modal"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.11101","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-26T18:12:34Z","cross_cats_sorted":["eess.IV","stat.ML"],"title_canon_sha256":"2d6638026942c684b7d4b7e48927ce331495f24acc3780cc68d2e0e56962c3c2","abstract_canon_sha256":"b996b9c7794326a05f3f5eeada36074f11a069f5ee23a0518507777ed3501f14"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:04.167236Z","signature_b64":"TR4VWGG2byrk4dPFOFqsFM14JODwNHGV7+mwmFDGG2S1Ov7pQGqjossEHxy91pEB/cA3uGld6mNjdEZfyEhtBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"89e2fefa1f7d3ae434027dd634c6d37132623f30892d0678503df0296af49434","last_reissued_at":"2026-05-17T23:50:04.166575Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:04.166575Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Cross-Modal Data Programming Enables Rapid Medical Machine Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["eess.IV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexander Ratner, Christopher Lee-Messer, Christopher R\\'e, Daniel Rubin, Hersh Sagreiya, Jared Dunnmon, Khaled Saab, Matthew Lungren, Matthew Markert, Nishith Khandwala, Roger Goldman","submitted_at":"2019-03-26T18:12:34Z","abstract_excerpt":"Labeling training datasets has become a key barrier to building medical machine learning models. One strategy is to generate training labels programmatically, for example by applying natural language processing pipelines to text reports associated with imaging studies. We propose cross-modal data programming, which generalizes this intuitive strategy in a theoretically-grounded way that enables simpler, clinician-driven input, reduces required labeling time, and improves with additional unlabeled data. In this approach, clinicians generate training labels for models defined over a target modal"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.11101","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.11101","created_at":"2026-05-17T23:50:04.166690+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.11101v1","created_at":"2026-05-17T23:50:04.166690+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.11101","created_at":"2026-05-17T23:50:04.166690+00:00"},{"alias_kind":"pith_short_12","alias_value":"RHRP56Q7PU5O","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"RHRP56Q7PU5OINAC","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"RHRP56Q7","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE","json":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE.json","graph_json":"https://pith.science/api/pith-number/RHRP56Q7PU5OINACPXLDJRWTOE/graph.json","events_json":"https://pith.science/api/pith-number/RHRP56Q7PU5OINACPXLDJRWTOE/events.json","paper":"https://pith.science/paper/RHRP56Q7"},"agent_actions":{"view_html":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE","download_json":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE.json","view_paper":"https://pith.science/paper/RHRP56Q7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.11101&json=true","fetch_graph":"https://pith.science/api/pith-number/RHRP56Q7PU5OINACPXLDJRWTOE/graph.json","fetch_events":"https://pith.science/api/pith-number/RHRP56Q7PU5OINACPXLDJRWTOE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE/action/storage_attestation","attest_author":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE/action/author_attestation","sign_citation":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE/action/citation_signature","submit_replication":"https://pith.science/pith/RHRP56Q7PU5OINACPXLDJRWTOE/action/replication_record"}},"created_at":"2026-05-17T23:50:04.166690+00:00","updated_at":"2026-05-17T23:50:04.166690+00:00"}