{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:OWEM6NY5QRDEKVB2RPIABDVBE7","short_pith_number":"pith:OWEM6NY5","canonical_record":{"source":{"id":"1603.00106","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T00:45:18Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"2a40b853fe33f0f452de18a7faef21c57f5f5b383c10bf394b17d20a113de174","abstract_canon_sha256":"f5dd6cda112a2e42cfa7ffac0e9e06959295e11e27f670aa7cd7ae51615d4356"},"schema_version":"1.0"},"canonical_sha256":"7588cf371d844645543a8bd0008ea127f9409f4ce56246fc1d3b8f19c757c75e","source":{"kind":"arxiv","id":"1603.00106","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00106","created_at":"2026-05-18T01:12:54Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00106v2","created_at":"2026-05-18T01:12:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00106","created_at":"2026-05-18T01:12:54Z"},{"alias_kind":"pith_short_12","alias_value":"OWEM6NY5QRDE","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_16","alias_value":"OWEM6NY5QRDEKVB2","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_8","alias_value":"OWEM6NY5","created_at":"2026-05-18T12:30:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:OWEM6NY5QRDEKVB2RPIABDVBE7","target":"record","payload":{"canonical_record":{"source":{"id":"1603.00106","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T00:45:18Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"2a40b853fe33f0f452de18a7faef21c57f5f5b383c10bf394b17d20a113de174","abstract_canon_sha256":"f5dd6cda112a2e42cfa7ffac0e9e06959295e11e27f670aa7cd7ae51615d4356"},"schema_version":"1.0"},"canonical_sha256":"7588cf371d844645543a8bd0008ea127f9409f4ce56246fc1d3b8f19c757c75e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:12:54.914662Z","signature_b64":"GahoMu8Dncj+mEdE8kBNyGH65Uu3Z9huD0z6mRuQaOUkE00B5pS+IzFJUCQkH7/eUEU2wKJDZfu6GGjqYLFjDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7588cf371d844645543a8bd0008ea127f9409f4ce56246fc1d3b8f19c757c75e","last_reissued_at":"2026-05-18T01:12:54.914326Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:12:54.914326Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1603.00106","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:12:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DtMqBWvBRD31/DLDmC00DjHNZ/wW9IDhyMnhPdhmJS9IfPq6TXpCdDY0OoJPLCuiJ41PFcol8qSWW069tE+LBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T13:44:58.239355Z"},"content_sha256":"1ca552a2b1feff49850c7f88b60eee9069f65a6c67db92ca21b5dca17ec9ce62","schema_version":"1.0","event_id":"sha256:1ca552a2b1feff49850c7f88b60eee9069f65a6c67db92ca21b5dca17ec9ce62"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:OWEM6NY5QRDEKVB2RPIABDVBE7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Characterizing Diseases from Unstructured Text: A Vocabulary Driven Word2vec Approach","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Emily Cohn, John S. Brownstein, Naren Ramakrishnan, Prithwish Chakraborty, Saurav Ghosh","submitted_at":"2016-03-01T00:45:18Z","abstract_excerpt":"Traditional disease surveillance can be augmented with a wide variety of real-time sources such as, news and social media. However, these sources are in general unstructured and, construction of surveillance tools such as taxonomical correlations and trace mapping involves considerable human supervision. In this paper, we motivate a disease vocabulary driven word2vec model (Dis2Vec) to model diseases and constituent attributes as word embeddings from the HealthMap news corpus. We use these word embeddings to automatically create disease taxonomies and evaluate our model against corresponding h"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00106","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:12:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XyRzj+wd36NXTuEsXhgglTyPj/1e8+jlb86oo4/lzkAz1iRtJKEJaqIGopFaf+mm/e99pbBUSzilq9+HCFjSDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T13:44:58.239975Z"},"content_sha256":"1b29f790b2e3c9b1026c9a921f1acbd2df16df60fe5f283558ee4cfd486495fc","schema_version":"1.0","event_id":"sha256:1b29f790b2e3c9b1026c9a921f1acbd2df16df60fe5f283558ee4cfd486495fc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OWEM6NY5QRDEKVB2RPIABDVBE7/bundle.json","state_url":"https://pith.science/pith/OWEM6NY5QRDEKVB2RPIABDVBE7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OWEM6NY5QRDEKVB2RPIABDVBE7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T13:44:58Z","links":{"resolver":"https://pith.science/pith/OWEM6NY5QRDEKVB2RPIABDVBE7","bundle":"https://pith.science/pith/OWEM6NY5QRDEKVB2RPIABDVBE7/bundle.json","state":"https://pith.science/pith/OWEM6NY5QRDEKVB2RPIABDVBE7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OWEM6NY5QRDEKVB2RPIABDVBE7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:OWEM6NY5QRDEKVB2RPIABDVBE7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f5dd6cda112a2e42cfa7ffac0e9e06959295e11e27f670aa7cd7ae51615d4356","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T00:45:18Z","title_canon_sha256":"2a40b853fe33f0f452de18a7faef21c57f5f5b383c10bf394b17d20a113de174"},"schema_version":"1.0","source":{"id":"1603.00106","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00106","created_at":"2026-05-18T01:12:54Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00106v2","created_at":"2026-05-18T01:12:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00106","created_at":"2026-05-18T01:12:54Z"},{"alias_kind":"pith_short_12","alias_value":"OWEM6NY5QRDE","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_16","alias_value":"OWEM6NY5QRDEKVB2","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_8","alias_value":"OWEM6NY5","created_at":"2026-05-18T12:30:36Z"}],"graph_snapshots":[{"event_id":"sha256:1b29f790b2e3c9b1026c9a921f1acbd2df16df60fe5f283558ee4cfd486495fc","target":"graph","created_at":"2026-05-18T01:12:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Traditional disease surveillance can be augmented with a wide variety of real-time sources such as, news and social media. However, these sources are in general unstructured and, construction of surveillance tools such as taxonomical correlations and trace mapping involves considerable human supervision. In this paper, we motivate a disease vocabulary driven word2vec model (Dis2Vec) to model diseases and constituent attributes as word embeddings from the HealthMap news corpus. We use these word embeddings to automatically create disease taxonomies and evaluate our model against corresponding h","authors_text":"Emily Cohn, John S. Brownstein, Naren Ramakrishnan, Prithwish Chakraborty, Saurav Ghosh","cross_cats":["cs.CL","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T00:45:18Z","title":"Characterizing Diseases from Unstructured Text: A Vocabulary Driven Word2vec Approach"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00106","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1ca552a2b1feff49850c7f88b60eee9069f65a6c67db92ca21b5dca17ec9ce62","target":"record","created_at":"2026-05-18T01:12:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f5dd6cda112a2e42cfa7ffac0e9e06959295e11e27f670aa7cd7ae51615d4356","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T00:45:18Z","title_canon_sha256":"2a40b853fe33f0f452de18a7faef21c57f5f5b383c10bf394b17d20a113de174"},"schema_version":"1.0","source":{"id":"1603.00106","kind":"arxiv","version":2}},"canonical_sha256":"7588cf371d844645543a8bd0008ea127f9409f4ce56246fc1d3b8f19c757c75e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7588cf371d844645543a8bd0008ea127f9409f4ce56246fc1d3b8f19c757c75e","first_computed_at":"2026-05-18T01:12:54.914326Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:12:54.914326Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"GahoMu8Dncj+mEdE8kBNyGH65Uu3Z9huD0z6mRuQaOUkE00B5pS+IzFJUCQkH7/eUEU2wKJDZfu6GGjqYLFjDw==","signature_status":"signed_v1","signed_at":"2026-05-18T01:12:54.914662Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.00106","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1ca552a2b1feff49850c7f88b60eee9069f65a6c67db92ca21b5dca17ec9ce62","sha256:1b29f790b2e3c9b1026c9a921f1acbd2df16df60fe5f283558ee4cfd486495fc"],"state_sha256":"8a9c49cf4843c153c6abc7cffb524097f07bfb09ef95b6482036839efc2557cd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"U19y/ckDbB4ptpqw7H3UNJanoI8tdn+QKHRw4I2q1DJn64hSnHc5Y2/Dfeagatl+kS93nGidFqPjgykI8rf7DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T13:44:58.243136Z","bundle_sha256":"8170eaaeffea77df0c0ed15940c8c2d13fa182611582d684a4cbc5dfcefb2afd"}}