{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:J6BDD2PQ72L4QGHIK56PFBRWPD","short_pith_number":"pith:J6BDD2PQ","canonical_record":{"source":{"id":"1811.00070","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-31T19:06:50Z","cross_cats_sorted":[],"title_canon_sha256":"1a353cc438792989b6db46723b7dddac2fdffe075d338b099499a5a6a778b203","abstract_canon_sha256":"b9d714960eb96c51f6a95cd9c993e5904e8d01487be95772dcd36453f8d5101f"},"schema_version":"1.0"},"canonical_sha256":"4f8231e9f0fe97c818e8577cf2863678cac8b86e0a9000cfe343457e764872a1","source":{"kind":"arxiv","id":"1811.00070","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.00070","created_at":"2026-05-17T23:49:17Z"},{"alias_kind":"arxiv_version","alias_value":"1811.00070v2","created_at":"2026-05-17T23:49:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.00070","created_at":"2026-05-17T23:49:17Z"},{"alias_kind":"pith_short_12","alias_value":"J6BDD2PQ72L4","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"J6BDD2PQ72L4QGHI","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"J6BDD2PQ","created_at":"2026-05-18T12:32:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:J6BDD2PQ72L4QGHIK56PFBRWPD","target":"record","payload":{"canonical_record":{"source":{"id":"1811.00070","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-31T19:06:50Z","cross_cats_sorted":[],"title_canon_sha256":"1a353cc438792989b6db46723b7dddac2fdffe075d338b099499a5a6a778b203","abstract_canon_sha256":"b9d714960eb96c51f6a95cd9c993e5904e8d01487be95772dcd36453f8d5101f"},"schema_version":"1.0"},"canonical_sha256":"4f8231e9f0fe97c818e8577cf2863678cac8b86e0a9000cfe343457e764872a1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:17.197207Z","signature_b64":"ZvHU/S1faMCZRJs+PtpjfIwNMQ2cJsvSdxhIgEWHQv+Gp77RQikJKntZJ/ZGCCFluVLftlhu58MHMVEZxZhMAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4f8231e9f0fe97c818e8577cf2863678cac8b86e0a9000cfe343457e764872a1","last_reissued_at":"2026-05-17T23:49:17.196566Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:17.196566Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.00070","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DW5LZeVraq0zE4jGpv30Vxf6kIu+bkLIgQzGYrJM+GUKAabU2jN2nrjYA8cWM5G7H9N4OV0P2gYxT8l+imHgCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T01:22:08.028757Z"},"content_sha256":"113f919ae120a4f11c98a009f45b35fd8e6436db0394f1aa3c73b8d0f63d2826","schema_version":"1.0","event_id":"sha256:113f919ae120a4f11c98a009f45b35fd8e6436db0394f1aa3c73b8d0f63d2826"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:J6BDD2PQ72L4QGHIK56PFBRWPD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Effective Feature Representation for Clinical Text Concept Extraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bruno Godefroy, Christopher Potts, Guillaume Genthial, Yifeng Tao","submitted_at":"2018-10-31T19:06:50Z","abstract_excerpt":"Crucial information about the practice of healthcare is recorded only in free-form text, which creates an enormous opportunity for high-impact NLP. However, annotated healthcare datasets tend to be small and expensive to obtain, which raises the question of how to make maximally efficient uses of the available data. To this end, we develop an LSTM-CRF model for combining unsupervised word representations and hand-built feature representations derived from publicly available healthcare ontologies. We show that this combined model yields superior performance on five datasets of diverse kinds of "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.00070","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"F2DXV9ohqjX+ZS54wty3+ngU4m9j3lcxIzWFkMONL1kYttDElhj5pnzEyK6YFoc/6d+yQmLvEmdOv101XJmSAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T01:22:08.029427Z"},"content_sha256":"77e65ed8d428bf135c1327fb389dd5d334e29d8596099aaba7779980e682c328","schema_version":"1.0","event_id":"sha256:77e65ed8d428bf135c1327fb389dd5d334e29d8596099aaba7779980e682c328"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/J6BDD2PQ72L4QGHIK56PFBRWPD/bundle.json","state_url":"https://pith.science/pith/J6BDD2PQ72L4QGHIK56PFBRWPD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/J6BDD2PQ72L4QGHIK56PFBRWPD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T01:22:08Z","links":{"resolver":"https://pith.science/pith/J6BDD2PQ72L4QGHIK56PFBRWPD","bundle":"https://pith.science/pith/J6BDD2PQ72L4QGHIK56PFBRWPD/bundle.json","state":"https://pith.science/pith/J6BDD2PQ72L4QGHIK56PFBRWPD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/J6BDD2PQ72L4QGHIK56PFBRWPD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:J6BDD2PQ72L4QGHIK56PFBRWPD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b9d714960eb96c51f6a95cd9c993e5904e8d01487be95772dcd36453f8d5101f","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-31T19:06:50Z","title_canon_sha256":"1a353cc438792989b6db46723b7dddac2fdffe075d338b099499a5a6a778b203"},"schema_version":"1.0","source":{"id":"1811.00070","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.00070","created_at":"2026-05-17T23:49:17Z"},{"alias_kind":"arxiv_version","alias_value":"1811.00070v2","created_at":"2026-05-17T23:49:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.00070","created_at":"2026-05-17T23:49:17Z"},{"alias_kind":"pith_short_12","alias_value":"J6BDD2PQ72L4","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"J6BDD2PQ72L4QGHI","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"J6BDD2PQ","created_at":"2026-05-18T12:32:31Z"}],"graph_snapshots":[{"event_id":"sha256:77e65ed8d428bf135c1327fb389dd5d334e29d8596099aaba7779980e682c328","target":"graph","created_at":"2026-05-17T23:49:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Crucial information about the practice of healthcare is recorded only in free-form text, which creates an enormous opportunity for high-impact NLP. However, annotated healthcare datasets tend to be small and expensive to obtain, which raises the question of how to make maximally efficient uses of the available data. To this end, we develop an LSTM-CRF model for combining unsupervised word representations and hand-built feature representations derived from publicly available healthcare ontologies. We show that this combined model yields superior performance on five datasets of diverse kinds of ","authors_text":"Bruno Godefroy, Christopher Potts, Guillaume Genthial, Yifeng Tao","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-31T19:06:50Z","title":"Effective Feature Representation for Clinical Text Concept Extraction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.00070","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:113f919ae120a4f11c98a009f45b35fd8e6436db0394f1aa3c73b8d0f63d2826","target":"record","created_at":"2026-05-17T23:49:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b9d714960eb96c51f6a95cd9c993e5904e8d01487be95772dcd36453f8d5101f","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-31T19:06:50Z","title_canon_sha256":"1a353cc438792989b6db46723b7dddac2fdffe075d338b099499a5a6a778b203"},"schema_version":"1.0","source":{"id":"1811.00070","kind":"arxiv","version":2}},"canonical_sha256":"4f8231e9f0fe97c818e8577cf2863678cac8b86e0a9000cfe343457e764872a1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4f8231e9f0fe97c818e8577cf2863678cac8b86e0a9000cfe343457e764872a1","first_computed_at":"2026-05-17T23:49:17.196566Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:49:17.196566Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ZvHU/S1faMCZRJs+PtpjfIwNMQ2cJsvSdxhIgEWHQv+Gp77RQikJKntZJ/ZGCCFluVLftlhu58MHMVEZxZhMAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:49:17.197207Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.00070","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:113f919ae120a4f11c98a009f45b35fd8e6436db0394f1aa3c73b8d0f63d2826","sha256:77e65ed8d428bf135c1327fb389dd5d334e29d8596099aaba7779980e682c328"],"state_sha256":"93478e2c016e3c617d751d268102f25b9390c47b472799d1c024485053ae8b29"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T4F4m4srf+vkEs3aA++yKaVckz+uOB46thgqmaItO58qV6zAiXWwcdzwn/Jx+JP/4K40eOdH/JpT0HmZ4qtbAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T01:22:08.032741Z","bundle_sha256":"d03b41da5b3d20274bb49310aabda61884056dbddf0d4ae27e3e945bae8d1bc9"}}