{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:HCCEPYDRIBMEME2HKBNRYJGCM3","short_pith_number":"pith:HCCEPYDR","canonical_record":{"source":{"id":"1607.02810","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-07-11T02:46:48Z","cross_cats_sorted":[],"title_canon_sha256":"036cf320a2f2fd7093046124c11b03b5df7ff8d9b0b77f04a59817a7889c1a45","abstract_canon_sha256":"33eb39ab335f78b583d99f0601dfa5dc1fecedfcb7535e563d09f9e7cf537592"},"schema_version":"1.0"},"canonical_sha256":"388447e0714058461347505b1c24c266f30b91af65fbb39d93657049f2400825","source":{"kind":"arxiv","id":"1607.02810","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1607.02810","created_at":"2026-05-18T00:58:55Z"},{"alias_kind":"arxiv_version","alias_value":"1607.02810v4","created_at":"2026-05-18T00:58:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1607.02810","created_at":"2026-05-18T00:58:55Z"},{"alias_kind":"pith_short_12","alias_value":"HCCEPYDRIBME","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"HCCEPYDRIBMEME2H","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"HCCEPYDR","created_at":"2026-05-18T12:30:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:HCCEPYDRIBMEME2HKBNRYJGCM3","target":"record","payload":{"canonical_record":{"source":{"id":"1607.02810","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-07-11T02:46:48Z","cross_cats_sorted":[],"title_canon_sha256":"036cf320a2f2fd7093046124c11b03b5df7ff8d9b0b77f04a59817a7889c1a45","abstract_canon_sha256":"33eb39ab335f78b583d99f0601dfa5dc1fecedfcb7535e563d09f9e7cf537592"},"schema_version":"1.0"},"canonical_sha256":"388447e0714058461347505b1c24c266f30b91af65fbb39d93657049f2400825","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:58:55.034446Z","signature_b64":"ivMGmL3IgihMa8VjZYHhSoT6+cVHHQAZXti+v7iT2HMfmEH/D7sNTJfdXZubLqxii6/4w2NT8sfOhg+3N7+FCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"388447e0714058461347505b1c24c266f30b91af65fbb39d93657049f2400825","last_reissued_at":"2026-05-18T00:58:55.033784Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:58:55.033784Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1607.02810","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:58:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rupVy1SspXeZStRbbhQ8gyCUPo+uNwob9gmdbXaqWdFZGt/zvoJ3hvFIENnIF+jtetRECzTjQQEvy97iJH0aCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:45:35.554531Z"},"content_sha256":"c8e4bf67785f01c45465ea8153e46022147c479a3205fa95376ef937264db3c4","schema_version":"1.0","event_id":"sha256:c8e4bf67785f01c45465ea8153e46022147c479a3205fa95376ef937264db3c4"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:HCCEPYDRIBMEME2HKBNRYJGCM3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"The Benefits of Word Embeddings Features for Active Learning in Clinical Information Extraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anthony Nguyen, Guido Zuccon, Lance De Vine, Laurianne Sitbon, Mahnoosh Kholghi","submitted_at":"2016-07-11T02:46:48Z","abstract_excerpt":"This study investigates the use of unsupervised word embeddings and sequence features for sample representation in an active learning framework built to extract clinical concepts from clinical free text. The objective is to further reduce the manual annotation effort while achieving higher effectiveness compared to a set of baseline features. Unsupervised features are derived from skip-gram word embeddings and a sequence representation approach. The comparative performance of unsupervised features and baseline hand-crafted features in an active learning framework are investigated using a wide "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1607.02810","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:58:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EOzEHTlLMdjMHOK+M3ms/ed85B989xwypNCuFTRSiZSH7+qKGsEVWkNeh3cg1Chc8fcvwlwSpOkLOKiblLNRDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:45:35.554881Z"},"content_sha256":"20eec0a8184176d7e6f7f5cfe358972e6e952f2ef79628d3d5f0e75ec5f3d08b","schema_version":"1.0","event_id":"sha256:20eec0a8184176d7e6f7f5cfe358972e6e952f2ef79628d3d5f0e75ec5f3d08b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HCCEPYDRIBMEME2HKBNRYJGCM3/bundle.json","state_url":"https://pith.science/pith/HCCEPYDRIBMEME2HKBNRYJGCM3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HCCEPYDRIBMEME2HKBNRYJGCM3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T08:45:35Z","links":{"resolver":"https://pith.science/pith/HCCEPYDRIBMEME2HKBNRYJGCM3","bundle":"https://pith.science/pith/HCCEPYDRIBMEME2HKBNRYJGCM3/bundle.json","state":"https://pith.science/pith/HCCEPYDRIBMEME2HKBNRYJGCM3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HCCEPYDRIBMEME2HKBNRYJGCM3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:HCCEPYDRIBMEME2HKBNRYJGCM3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"33eb39ab335f78b583d99f0601dfa5dc1fecedfcb7535e563d09f9e7cf537592","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-07-11T02:46:48Z","title_canon_sha256":"036cf320a2f2fd7093046124c11b03b5df7ff8d9b0b77f04a59817a7889c1a45"},"schema_version":"1.0","source":{"id":"1607.02810","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1607.02810","created_at":"2026-05-18T00:58:55Z"},{"alias_kind":"arxiv_version","alias_value":"1607.02810v4","created_at":"2026-05-18T00:58:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1607.02810","created_at":"2026-05-18T00:58:55Z"},{"alias_kind":"pith_short_12","alias_value":"HCCEPYDRIBME","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"HCCEPYDRIBMEME2H","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"HCCEPYDR","created_at":"2026-05-18T12:30:19Z"}],"graph_snapshots":[{"event_id":"sha256:20eec0a8184176d7e6f7f5cfe358972e6e952f2ef79628d3d5f0e75ec5f3d08b","target":"graph","created_at":"2026-05-18T00:58:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This study investigates the use of unsupervised word embeddings and sequence features for sample representation in an active learning framework built to extract clinical concepts from clinical free text. The objective is to further reduce the manual annotation effort while achieving higher effectiveness compared to a set of baseline features. Unsupervised features are derived from skip-gram word embeddings and a sequence representation approach. The comparative performance of unsupervised features and baseline hand-crafted features in an active learning framework are investigated using a wide ","authors_text":"Anthony Nguyen, Guido Zuccon, Lance De Vine, Laurianne Sitbon, Mahnoosh Kholghi","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-07-11T02:46:48Z","title":"The Benefits of Word Embeddings Features for Active Learning in Clinical Information Extraction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1607.02810","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c8e4bf67785f01c45465ea8153e46022147c479a3205fa95376ef937264db3c4","target":"record","created_at":"2026-05-18T00:58:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"33eb39ab335f78b583d99f0601dfa5dc1fecedfcb7535e563d09f9e7cf537592","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-07-11T02:46:48Z","title_canon_sha256":"036cf320a2f2fd7093046124c11b03b5df7ff8d9b0b77f04a59817a7889c1a45"},"schema_version":"1.0","source":{"id":"1607.02810","kind":"arxiv","version":4}},"canonical_sha256":"388447e0714058461347505b1c24c266f30b91af65fbb39d93657049f2400825","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"388447e0714058461347505b1c24c266f30b91af65fbb39d93657049f2400825","first_computed_at":"2026-05-18T00:58:55.033784Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:58:55.033784Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ivMGmL3IgihMa8VjZYHhSoT6+cVHHQAZXti+v7iT2HMfmEH/D7sNTJfdXZubLqxii6/4w2NT8sfOhg+3N7+FCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:58:55.034446Z","signed_message":"canonical_sha256_bytes"},"source_id":"1607.02810","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c8e4bf67785f01c45465ea8153e46022147c479a3205fa95376ef937264db3c4","sha256:20eec0a8184176d7e6f7f5cfe358972e6e952f2ef79628d3d5f0e75ec5f3d08b"],"state_sha256":"ad7ebec0051e92a5ef5883314f8bc066c84eef34e61473b026425907ea0c35ee"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"81yBsTPkqRwA0mlIC3eNCuf8x/i+0bWavtZ8SqjzVgALzo9piIAuhtvbZrgdvVCo9X0CK4fpyTvYfmfdYhrpBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T08:45:35.556802Z","bundle_sha256":"485ae7e5909016098409d0e0c28a752c709ca5ee580fc905fc4884ef5ec65332"}}