{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:XDPNSXO6OMLN5ON5TN74VOEVFB","short_pith_number":"pith:XDPNSXO6","schema_version":"1.0","canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","source":{"kind":"arxiv","id":"1904.12617","version":1},"attestation_state":"computed","paper":{"title":"Using Machine Learning and Natural Language Processing to Review and Classify the Medical Literature on Cancer Susceptibility Genes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Cathy Wang, Danielle Braun, Francisco Acevedo, Giovanni Parmigiani, Heeyoon Kim, Kevin S Hughes, Nofal Ouardaoui, Regina Barzilay, Victor Diego Armengol, Yan Wang, Yujia Bao, Zhengyi Deng","submitted_at":"2019-04-24T17:20:21Z","abstract_excerpt":"PURPOSE: The medical literature relevant to germline genetics is growing exponentially. Clinicians need tools monitoring and prioritizing the literature to understand the clinical implications of the pathogenic genetic variants. We developed and evaluated two machine learning models to classify abstracts as relevant to the penetrance (risk of cancer for germline mutation carriers) or prevalence of germline genetic mutations. METHODS: We conducted literature searches in PubMed and retrieved paper titles and abstracts to create an annotated dataset for training and evaluating the two machine lea"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.12617","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-04-24T17:20:21Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"5248b7ad2aa4f986f4eb00a4d7291afe63af0b316895b5bb6f3e42aaef5700d4","abstract_canon_sha256":"d3944fbad7c7600d83e8f12a8e82f68add7f5d49b6f5e8c75bd44184360a4d57"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:47:34.598734Z","signature_b64":"lfuapukjWlvmKZQtTqovZxxws2GmLy+gnI67IGhZpmY6qjhousOW82T/4QrYvf5+AnVuSTryAQKz/Tb8AzS8Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","last_reissued_at":"2026-05-17T23:47:34.598139Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:47:34.598139Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Using Machine Learning and Natural Language Processing to Review and Classify the Medical Literature on Cancer Susceptibility Genes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Cathy Wang, Danielle Braun, Francisco Acevedo, Giovanni Parmigiani, Heeyoon Kim, Kevin S Hughes, Nofal Ouardaoui, Regina Barzilay, Victor Diego Armengol, Yan Wang, Yujia Bao, Zhengyi Deng","submitted_at":"2019-04-24T17:20:21Z","abstract_excerpt":"PURPOSE: The medical literature relevant to germline genetics is growing exponentially. Clinicians need tools monitoring and prioritizing the literature to understand the clinical implications of the pathogenic genetic variants. We developed and evaluated two machine learning models to classify abstracts as relevant to the penetrance (risk of cancer for germline mutation carriers) or prevalence of germline genetic mutations. METHODS: We conducted literature searches in PubMed and retrieved paper titles and abstracts to create an annotated dataset for training and evaluating the two machine lea"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.12617","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.12617","created_at":"2026-05-17T23:47:34.598238+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.12617v1","created_at":"2026-05-17T23:47:34.598238+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.12617","created_at":"2026-05-17T23:47:34.598238+00:00"},{"alias_kind":"pith_short_12","alias_value":"XDPNSXO6OMLN","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"XDPNSXO6OMLN5ON5","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"XDPNSXO6","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB","json":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB.json","graph_json":"https://pith.science/api/pith-number/XDPNSXO6OMLN5ON5TN74VOEVFB/graph.json","events_json":"https://pith.science/api/pith-number/XDPNSXO6OMLN5ON5TN74VOEVFB/events.json","paper":"https://pith.science/paper/XDPNSXO6"},"agent_actions":{"view_html":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB","download_json":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB.json","view_paper":"https://pith.science/paper/XDPNSXO6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.12617&json=true","fetch_graph":"https://pith.science/api/pith-number/XDPNSXO6OMLN5ON5TN74VOEVFB/graph.json","fetch_events":"https://pith.science/api/pith-number/XDPNSXO6OMLN5ON5TN74VOEVFB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/action/storage_attestation","attest_author":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/action/author_attestation","sign_citation":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/action/citation_signature","submit_replication":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/action/replication_record"}},"created_at":"2026-05-17T23:47:34.598238+00:00","updated_at":"2026-05-17T23:47:34.598238+00:00"}