{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:GE7PGCDUTUJFMQ3ANADLGMNP5N","short_pith_number":"pith:GE7PGCDU","canonical_record":{"source":{"id":"1710.11528","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-10-31T15:21:32Z","cross_cats_sorted":[],"title_canon_sha256":"e88a67b643c192c0bbba7daa4dc9b14295452ff11853072a9a908936252313bc","abstract_canon_sha256":"e1ceb17b2d706939f7d6280ce153634d78edbe8f6e9b1c1deb5046dcf7f144ff"},"schema_version":"1.0"},"canonical_sha256":"313ef308749d125643606806b331afeb6ce18d6f6982bd3ef54188d282ebaf22","source":{"kind":"arxiv","id":"1710.11528","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1710.11528","created_at":"2026-05-18T00:28:40Z"},{"alias_kind":"arxiv_version","alias_value":"1710.11528v2","created_at":"2026-05-18T00:28:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11528","created_at":"2026-05-18T00:28:40Z"},{"alias_kind":"pith_short_12","alias_value":"GE7PGCDUTUJF","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"GE7PGCDUTUJFMQ3A","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"GE7PGCDU","created_at":"2026-05-18T12:31:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:GE7PGCDUTUJFMQ3ANADLGMNP5N","target":"record","payload":{"canonical_record":{"source":{"id":"1710.11528","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-10-31T15:21:32Z","cross_cats_sorted":[],"title_canon_sha256":"e88a67b643c192c0bbba7daa4dc9b14295452ff11853072a9a908936252313bc","abstract_canon_sha256":"e1ceb17b2d706939f7d6280ce153634d78edbe8f6e9b1c1deb5046dcf7f144ff"},"schema_version":"1.0"},"canonical_sha256":"313ef308749d125643606806b331afeb6ce18d6f6982bd3ef54188d282ebaf22","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:28:40.200563Z","signature_b64":"uqXQ6bzLP/IQjQdLJQk4R4BbH2gPtTObOvAh/Z7Qc9PxGgYD7Y3RuQxlyduULgtHCdHzXsoJiIJeJ5TirX3kDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"313ef308749d125643606806b331afeb6ce18d6f6982bd3ef54188d282ebaf22","last_reissued_at":"2026-05-18T00:28:40.199917Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:28:40.199917Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1710.11528","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Hpt5uZwqaHPjmMjhg2RsiZk2CHWpM+bI1cTltvZ1eTxfqmmSQnEIUrmz4/NwD0qt388N+V5kzltiEj9WpQKACA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T09:31:41.318652Z"},"content_sha256":"1ff75e1d65868bffac65af91e71139a7bb9af8e67620a96a20645c660a4c4765","schema_version":"1.0","event_id":"sha256:1ff75e1d65868bffac65af91e71139a7bb9af8e67620a96a20645c660a4c4765"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:GE7PGCDUTUJFMQ3ANADLGMNP5N","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Extracting Syntactic Patterns from Databases","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Andrew Ilyas, Joana M. F. da Trindade, Raul Castro Fernandez, Samuel Madden","submitted_at":"2017-10-31T15:21:32Z","abstract_excerpt":"Many database columns contain string or numerical data that conforms to a pattern, such as phone numbers, dates, addresses, product identifiers, and employee ids. These patterns are useful in a number of data processing applications, including understanding what a specific field represents when field names are ambiguous, identifying outlier values, and finding similar fields across data sets. One way to express such patterns would be to learn regular expressions for each field in the database. Unfortunately, exist- ing techniques on regular expression learning are slow, taking hundreds of seco"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11528","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4cZG//gh2v44CS+5K1nEIHMXtuQL/pVjupHBaAAl/kMGWSoP/AKWKXbn3uS4/6AkHNp4e4wsWT0ruO/WztkFAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T09:31:41.319003Z"},"content_sha256":"edb6a502d1af698b99cc34c9d290f015edbff88fb1c576779140ca29246b76eb","schema_version":"1.0","event_id":"sha256:edb6a502d1af698b99cc34c9d290f015edbff88fb1c576779140ca29246b76eb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N/bundle.json","state_url":"https://pith.science/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T09:31:41Z","links":{"resolver":"https://pith.science/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N","bundle":"https://pith.science/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N/bundle.json","state":"https://pith.science/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GE7PGCDUTUJFMQ3ANADLGMNP5N/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:GE7PGCDUTUJFMQ3ANADLGMNP5N","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e1ceb17b2d706939f7d6280ce153634d78edbe8f6e9b1c1deb5046dcf7f144ff","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-10-31T15:21:32Z","title_canon_sha256":"e88a67b643c192c0bbba7daa4dc9b14295452ff11853072a9a908936252313bc"},"schema_version":"1.0","source":{"id":"1710.11528","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1710.11528","created_at":"2026-05-18T00:28:40Z"},{"alias_kind":"arxiv_version","alias_value":"1710.11528v2","created_at":"2026-05-18T00:28:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1710.11528","created_at":"2026-05-18T00:28:40Z"},{"alias_kind":"pith_short_12","alias_value":"GE7PGCDUTUJF","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"GE7PGCDUTUJFMQ3A","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"GE7PGCDU","created_at":"2026-05-18T12:31:15Z"}],"graph_snapshots":[{"event_id":"sha256:edb6a502d1af698b99cc34c9d290f015edbff88fb1c576779140ca29246b76eb","target":"graph","created_at":"2026-05-18T00:28:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Many database columns contain string or numerical data that conforms to a pattern, such as phone numbers, dates, addresses, product identifiers, and employee ids. These patterns are useful in a number of data processing applications, including understanding what a specific field represents when field names are ambiguous, identifying outlier values, and finding similar fields across data sets. One way to express such patterns would be to learn regular expressions for each field in the database. Unfortunately, exist- ing techniques on regular expression learning are slow, taking hundreds of seco","authors_text":"Andrew Ilyas, Joana M. F. da Trindade, Raul Castro Fernandez, Samuel Madden","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-10-31T15:21:32Z","title":"Extracting Syntactic Patterns from Databases"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1710.11528","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1ff75e1d65868bffac65af91e71139a7bb9af8e67620a96a20645c660a4c4765","target":"record","created_at":"2026-05-18T00:28:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e1ceb17b2d706939f7d6280ce153634d78edbe8f6e9b1c1deb5046dcf7f144ff","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-10-31T15:21:32Z","title_canon_sha256":"e88a67b643c192c0bbba7daa4dc9b14295452ff11853072a9a908936252313bc"},"schema_version":"1.0","source":{"id":"1710.11528","kind":"arxiv","version":2}},"canonical_sha256":"313ef308749d125643606806b331afeb6ce18d6f6982bd3ef54188d282ebaf22","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"313ef308749d125643606806b331afeb6ce18d6f6982bd3ef54188d282ebaf22","first_computed_at":"2026-05-18T00:28:40.199917Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:28:40.199917Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"uqXQ6bzLP/IQjQdLJQk4R4BbH2gPtTObOvAh/Z7Qc9PxGgYD7Y3RuQxlyduULgtHCdHzXsoJiIJeJ5TirX3kDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:28:40.200563Z","signed_message":"canonical_sha256_bytes"},"source_id":"1710.11528","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1ff75e1d65868bffac65af91e71139a7bb9af8e67620a96a20645c660a4c4765","sha256:edb6a502d1af698b99cc34c9d290f015edbff88fb1c576779140ca29246b76eb"],"state_sha256":"22b01d01edbd2e1e52891f6f300031c980b635385ce0419f356dcccc52d17e6c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Q5uenSJYHOeANPPaj8WbcD78XI6LVH+tI37fzQ4irxL0M3P9HT6pzKHSvW62AVXJMo3DbHxQe0+TYUylm5giCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T09:31:41.321173Z","bundle_sha256":"54f20884c003f68ae77de376662db55d4b5221e6a5cc54d70a8df537fe3bc547"}}