{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:UQ2C772I2774RSHTIV6WQZSUMT","short_pith_number":"pith:UQ2C772I","canonical_record":{"source":{"id":"1905.10688","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-25T22:36:05Z","cross_cats_sorted":["cs.DB","cs.IR","stat.ML"],"title_canon_sha256":"09b5950f224ca27861847aff8f5fa05c75349a5fbd761ffdeef80f104742492f","abstract_canon_sha256":"a8420f7a5bd57f0ceb3ec1fa5370be82f92b707e58eebd89dc4c28dce2953542"},"schema_version":"1.0"},"canonical_sha256":"a4342fff48d7ffc8c8f3457d68665464f77cd954e71cbe3f4416b160193cbc0d","source":{"kind":"arxiv","id":"1905.10688","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.10688","created_at":"2026-05-17T23:45:05Z"},{"alias_kind":"arxiv_version","alias_value":"1905.10688v1","created_at":"2026-05-17T23:45:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.10688","created_at":"2026-05-17T23:45:05Z"},{"alias_kind":"pith_short_12","alias_value":"UQ2C772I2774","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"UQ2C772I2774RSHT","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"UQ2C772I","created_at":"2026-05-18T12:33:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:UQ2C772I2774RSHTIV6WQZSUMT","target":"record","payload":{"canonical_record":{"source":{"id":"1905.10688","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-25T22:36:05Z","cross_cats_sorted":["cs.DB","cs.IR","stat.ML"],"title_canon_sha256":"09b5950f224ca27861847aff8f5fa05c75349a5fbd761ffdeef80f104742492f","abstract_canon_sha256":"a8420f7a5bd57f0ceb3ec1fa5370be82f92b707e58eebd89dc4c28dce2953542"},"schema_version":"1.0"},"canonical_sha256":"a4342fff48d7ffc8c8f3457d68665464f77cd954e71cbe3f4416b160193cbc0d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:05.549979Z","signature_b64":"FW+Drl/FTMl1yI87eFRwF2DabT1ozR3n7rKBLaPH8UK4yFSVrTwOnHMyyF0aR9HDp1+iMVFuiq+rnNudvV6fCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a4342fff48d7ffc8c8f3457d68665464f77cd954e71cbe3f4416b160193cbc0d","last_reissued_at":"2026-05-17T23:45:05.549250Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:05.549250Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1905.10688","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Os4Vf3RIRBL9FDNmkYGQM6KQ7jZtMyfbHBCN68Kid9co+EMoQdk3MOkrH5e7OysI/pMSw4EtnNKYYU7QRwRpAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T23:15:35.901418Z"},"content_sha256":"71df21e1bfdf16e58b7e774cdf900ffc3c0600ccc41c4480c4b0f598aaaaa349","schema_version":"1.0","event_id":"sha256:71df21e1bfdf16e58b7e774cdf900ffc3c0600ccc41c4480c4b0f598aaaaa349"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:UQ2C772I2774RSHTIV6WQZSUMT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Sherlock: A Deep Learning Approach to Semantic Data Type Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB","cs.IR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Arvind Satyanarayan, \\c{C}a\\u{g}atay Demiralp, C\\'esar Hidalgo, Emanuel Zgraggen, Kevin Hu, Madelon Hulsebos, Michiel Bakker, Tim Kraska","submitted_at":"2019-05-25T22:36:05Z","abstract_excerpt":"Correctly detecting the semantic type of data columns is crucial for data science tasks such as automated data cleaning, schema matching, and data discovery. Existing data preparation and analysis systems rely on dictionary lookups and regular expression matching to detect semantic types. However, these matching-based approaches often are not robust to dirty data and only detect a limited number of types. We introduce Sherlock, a multi-input deep neural network for detecting semantic types. We train Sherlock on $686,765$ data columns retrieved from the VizNet corpus by matching $78$ semantic t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.10688","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4gUjVdfWcEuVfUqFIlU38bZk7wndXr0cmTp8DTYn+VMWCaG5vHbJWXS83T+nd5WUkhJ+/CEpdFRjbST6dwOGBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T23:15:35.901796Z"},"content_sha256":"59aa921c68349a3afc7d0e2812a7f5f5ba748e935e3f8b97c56f61f67c6992b7","schema_version":"1.0","event_id":"sha256:59aa921c68349a3afc7d0e2812a7f5f5ba748e935e3f8b97c56f61f67c6992b7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UQ2C772I2774RSHTIV6WQZSUMT/bundle.json","state_url":"https://pith.science/pith/UQ2C772I2774RSHTIV6WQZSUMT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UQ2C772I2774RSHTIV6WQZSUMT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T23:15:35Z","links":{"resolver":"https://pith.science/pith/UQ2C772I2774RSHTIV6WQZSUMT","bundle":"https://pith.science/pith/UQ2C772I2774RSHTIV6WQZSUMT/bundle.json","state":"https://pith.science/pith/UQ2C772I2774RSHTIV6WQZSUMT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UQ2C772I2774RSHTIV6WQZSUMT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:UQ2C772I2774RSHTIV6WQZSUMT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a8420f7a5bd57f0ceb3ec1fa5370be82f92b707e58eebd89dc4c28dce2953542","cross_cats_sorted":["cs.DB","cs.IR","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-25T22:36:05Z","title_canon_sha256":"09b5950f224ca27861847aff8f5fa05c75349a5fbd761ffdeef80f104742492f"},"schema_version":"1.0","source":{"id":"1905.10688","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.10688","created_at":"2026-05-17T23:45:05Z"},{"alias_kind":"arxiv_version","alias_value":"1905.10688v1","created_at":"2026-05-17T23:45:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.10688","created_at":"2026-05-17T23:45:05Z"},{"alias_kind":"pith_short_12","alias_value":"UQ2C772I2774","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"UQ2C772I2774RSHT","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"UQ2C772I","created_at":"2026-05-18T12:33:30Z"}],"graph_snapshots":[{"event_id":"sha256:59aa921c68349a3afc7d0e2812a7f5f5ba748e935e3f8b97c56f61f67c6992b7","target":"graph","created_at":"2026-05-17T23:45:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Correctly detecting the semantic type of data columns is crucial for data science tasks such as automated data cleaning, schema matching, and data discovery. Existing data preparation and analysis systems rely on dictionary lookups and regular expression matching to detect semantic types. However, these matching-based approaches often are not robust to dirty data and only detect a limited number of types. We introduce Sherlock, a multi-input deep neural network for detecting semantic types. We train Sherlock on $686,765$ data columns retrieved from the VizNet corpus by matching $78$ semantic t","authors_text":"Arvind Satyanarayan, \\c{C}a\\u{g}atay Demiralp, C\\'esar Hidalgo, Emanuel Zgraggen, Kevin Hu, Madelon Hulsebos, Michiel Bakker, Tim Kraska","cross_cats":["cs.DB","cs.IR","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-25T22:36:05Z","title":"Sherlock: A Deep Learning Approach to Semantic Data Type Detection"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.10688","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:71df21e1bfdf16e58b7e774cdf900ffc3c0600ccc41c4480c4b0f598aaaaa349","target":"record","created_at":"2026-05-17T23:45:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a8420f7a5bd57f0ceb3ec1fa5370be82f92b707e58eebd89dc4c28dce2953542","cross_cats_sorted":["cs.DB","cs.IR","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-25T22:36:05Z","title_canon_sha256":"09b5950f224ca27861847aff8f5fa05c75349a5fbd761ffdeef80f104742492f"},"schema_version":"1.0","source":{"id":"1905.10688","kind":"arxiv","version":1}},"canonical_sha256":"a4342fff48d7ffc8c8f3457d68665464f77cd954e71cbe3f4416b160193cbc0d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a4342fff48d7ffc8c8f3457d68665464f77cd954e71cbe3f4416b160193cbc0d","first_computed_at":"2026-05-17T23:45:05.549250Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:45:05.549250Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FW+Drl/FTMl1yI87eFRwF2DabT1ozR3n7rKBLaPH8UK4yFSVrTwOnHMyyF0aR9HDp1+iMVFuiq+rnNudvV6fCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:45:05.549979Z","signed_message":"canonical_sha256_bytes"},"source_id":"1905.10688","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:71df21e1bfdf16e58b7e774cdf900ffc3c0600ccc41c4480c4b0f598aaaaa349","sha256:59aa921c68349a3afc7d0e2812a7f5f5ba748e935e3f8b97c56f61f67c6992b7"],"state_sha256":"44900ac643d9f1f9155fe8acb440d8dd05a0fd26e5d78cf0dd81c0baf3adb599"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AXg7HKfcxIG4f0w8hW/wZCso2itc7ZPJ4pADnLxs5YJPAA25AoaiXO8ix7TASmUDEvaz8QhbbdBjxGJlIbUGBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T23:15:35.903964Z","bundle_sha256":"a113aa8a7d6674c9909c1cfea13b96234af4dacbc59ebb4b6e4a39afd33f5bff"}}