{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:XDPNSXO6OMLN5ON5TN74VOEVFB","short_pith_number":"pith:XDPNSXO6","canonical_record":{"source":{"id":"1904.12617","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-04-24T17:20:21Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"5248b7ad2aa4f986f4eb00a4d7291afe63af0b316895b5bb6f3e42aaef5700d4","abstract_canon_sha256":"d3944fbad7c7600d83e8f12a8e82f68add7f5d49b6f5e8c75bd44184360a4d57"},"schema_version":"1.0"},"canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","source":{"kind":"arxiv","id":"1904.12617","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.12617","created_at":"2026-05-17T23:47:34Z"},{"alias_kind":"arxiv_version","alias_value":"1904.12617v1","created_at":"2026-05-17T23:47:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.12617","created_at":"2026-05-17T23:47:34Z"},{"alias_kind":"pith_short_12","alias_value":"XDPNSXO6OMLN","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"XDPNSXO6OMLN5ON5","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"XDPNSXO6","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:XDPNSXO6OMLN5ON5TN74VOEVFB","target":"record","payload":{"canonical_record":{"source":{"id":"1904.12617","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-04-24T17:20:21Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"5248b7ad2aa4f986f4eb00a4d7291afe63af0b316895b5bb6f3e42aaef5700d4","abstract_canon_sha256":"d3944fbad7c7600d83e8f12a8e82f68add7f5d49b6f5e8c75bd44184360a4d57"},"schema_version":"1.0"},"canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:47:34.598734Z","signature_b64":"lfuapukjWlvmKZQtTqovZxxws2GmLy+gnI67IGhZpmY6qjhousOW82T/4QrYvf5+AnVuSTryAQKz/Tb8AzS8Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","last_reissued_at":"2026-05-17T23:47:34.598139Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:47:34.598139Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.12617","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:47:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T8XR5yyptr5l5SLrOB0OYjG8BcqTqN5llNVpwUlLWMHc3JXZciHh+tOIlQ4DhaXi7u+UFUZdMxKARmCveBt7CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T14:49:32.532687Z"},"content_sha256":"623fdb3fe883b7b02b67597c8230161bf2e759a2fbb4016b09a51f27590e8c43","schema_version":"1.0","event_id":"sha256:623fdb3fe883b7b02b67597c8230161bf2e759a2fbb4016b09a51f27590e8c43"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:XDPNSXO6OMLN5ON5TN74VOEVFB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Using Machine Learning and Natural Language Processing to Review and Classify the Medical Literature on Cancer Susceptibility Genes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Cathy Wang, Danielle Braun, Francisco Acevedo, Giovanni Parmigiani, Heeyoon Kim, Kevin S Hughes, Nofal Ouardaoui, Regina Barzilay, Victor Diego Armengol, Yan Wang, Yujia Bao, Zhengyi Deng","submitted_at":"2019-04-24T17:20:21Z","abstract_excerpt":"PURPOSE: The medical literature relevant to germline genetics is growing exponentially. Clinicians need tools monitoring and prioritizing the literature to understand the clinical implications of the pathogenic genetic variants. We developed and evaluated two machine learning models to classify abstracts as relevant to the penetrance (risk of cancer for germline mutation carriers) or prevalence of germline genetic mutations. METHODS: We conducted literature searches in PubMed and retrieved paper titles and abstracts to create an annotated dataset for training and evaluating the two machine lea"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.12617","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:47:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LK73EtRU258Zie0pf+RSekF3TNdqJGkZz1XEMEg/7JHKyMK5Oef9ELQrJ5y4s77xf35BrJzaiO70mpN99hd0DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T14:49:32.533043Z"},"content_sha256":"605742419994b282ebcfcedc8a1d7a86edb750053e672c0fcf2d6ed3a96b0981","schema_version":"1.0","event_id":"sha256:605742419994b282ebcfcedc8a1d7a86edb750053e672c0fcf2d6ed3a96b0981"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/bundle.json","state_url":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T14:49:32Z","links":{"resolver":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB","bundle":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/bundle.json","state":"https://pith.science/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XDPNSXO6OMLN5ON5TN74VOEVFB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:XDPNSXO6OMLN5ON5TN74VOEVFB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d3944fbad7c7600d83e8f12a8e82f68add7f5d49b6f5e8c75bd44184360a4d57","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-04-24T17:20:21Z","title_canon_sha256":"5248b7ad2aa4f986f4eb00a4d7291afe63af0b316895b5bb6f3e42aaef5700d4"},"schema_version":"1.0","source":{"id":"1904.12617","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.12617","created_at":"2026-05-17T23:47:34Z"},{"alias_kind":"arxiv_version","alias_value":"1904.12617v1","created_at":"2026-05-17T23:47:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.12617","created_at":"2026-05-17T23:47:34Z"},{"alias_kind":"pith_short_12","alias_value":"XDPNSXO6OMLN","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"XDPNSXO6OMLN5ON5","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"XDPNSXO6","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:605742419994b282ebcfcedc8a1d7a86edb750053e672c0fcf2d6ed3a96b0981","target":"graph","created_at":"2026-05-17T23:47:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"PURPOSE: The medical literature relevant to germline genetics is growing exponentially. Clinicians need tools monitoring and prioritizing the literature to understand the clinical implications of the pathogenic genetic variants. We developed and evaluated two machine learning models to classify abstracts as relevant to the penetrance (risk of cancer for germline mutation carriers) or prevalence of germline genetic mutations. METHODS: We conducted literature searches in PubMed and retrieved paper titles and abstracts to create an annotated dataset for training and evaluating the two machine lea","authors_text":"Cathy Wang, Danielle Braun, Francisco Acevedo, Giovanni Parmigiani, Heeyoon Kim, Kevin S Hughes, Nofal Ouardaoui, Regina Barzilay, Victor Diego Armengol, Yan Wang, Yujia Bao, Zhengyi Deng","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-04-24T17:20:21Z","title":"Using Machine Learning and Natural Language Processing to Review and Classify the Medical Literature on Cancer Susceptibility Genes"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.12617","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:623fdb3fe883b7b02b67597c8230161bf2e759a2fbb4016b09a51f27590e8c43","target":"record","created_at":"2026-05-17T23:47:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d3944fbad7c7600d83e8f12a8e82f68add7f5d49b6f5e8c75bd44184360a4d57","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-04-24T17:20:21Z","title_canon_sha256":"5248b7ad2aa4f986f4eb00a4d7291afe63af0b316895b5bb6f3e42aaef5700d4"},"schema_version":"1.0","source":{"id":"1904.12617","kind":"arxiv","version":1}},"canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b8ded95dde7316deb9bd9b7fcab89528755651ac3f90e02177d11ab683d55bbf","first_computed_at":"2026-05-17T23:47:34.598139Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:47:34.598139Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lfuapukjWlvmKZQtTqovZxxws2GmLy+gnI67IGhZpmY6qjhousOW82T/4QrYvf5+AnVuSTryAQKz/Tb8AzS8Dg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:47:34.598734Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.12617","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:623fdb3fe883b7b02b67597c8230161bf2e759a2fbb4016b09a51f27590e8c43","sha256:605742419994b282ebcfcedc8a1d7a86edb750053e672c0fcf2d6ed3a96b0981"],"state_sha256":"73d247711aff58a2111b8617ac048fd113ca66abea0d1f5607fde918a95c0ed8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OBRFGBiSSJKhdjfaCeS123tuNzl4V40CSMX9uOF7QMHQZFtV53tYTaKnuBzZz6/NpH7PVKPuSctWbve3RGCtDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T14:49:32.535061Z","bundle_sha256":"fefbbc7c215c46e6add08987ad096aeedc9fedd45055909f2cb9b096bc6ea59e"}}