{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:THWAP7SW53GMTFHGLJ6OQSO5C3","short_pith_number":"pith:THWAP7SW","canonical_record":{"source":{"id":"1503.06598","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-03-23T11:05:26Z","cross_cats_sorted":[],"title_canon_sha256":"e8458ec9b3a5786e6f240fc851bd3ec62a9a20a4e02b220b9f262f8591796768","abstract_canon_sha256":"1d7947ff3a0f73d23ca80535880f146c047896876aa01ef567296b557444ac94"},"schema_version":"1.0"},"canonical_sha256":"99ec07fe56eeccc994e65a7ce849dd16e2a82efb709c893813e403bbedb88cfd","source":{"kind":"arxiv","id":"1503.06598","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1503.06598","created_at":"2026-05-18T01:09:48Z"},{"alias_kind":"arxiv_version","alias_value":"1503.06598v1","created_at":"2026-05-18T01:09:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1503.06598","created_at":"2026-05-18T01:09:48Z"},{"alias_kind":"pith_short_12","alias_value":"THWAP7SW53GM","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_16","alias_value":"THWAP7SW53GMTFHG","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_8","alias_value":"THWAP7SW","created_at":"2026-05-18T12:29:42Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:THWAP7SW53GMTFHGLJ6OQSO5C3","target":"record","payload":{"canonical_record":{"source":{"id":"1503.06598","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-03-23T11:05:26Z","cross_cats_sorted":[],"title_canon_sha256":"e8458ec9b3a5786e6f240fc851bd3ec62a9a20a4e02b220b9f262f8591796768","abstract_canon_sha256":"1d7947ff3a0f73d23ca80535880f146c047896876aa01ef567296b557444ac94"},"schema_version":"1.0"},"canonical_sha256":"99ec07fe56eeccc994e65a7ce849dd16e2a82efb709c893813e403bbedb88cfd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:09:48.950622Z","signature_b64":"/FRxQJGIXXoQfuc9WJ/uVHFJQ0wFN5LGdJvIrH8q3c6ko7Y2MEpZoToRwcFbyZy1ml45cISpWUlNO51m7Vl2CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"99ec07fe56eeccc994e65a7ce849dd16e2a82efb709c893813e403bbedb88cfd","last_reissued_at":"2026-05-18T01:09:48.949939Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:09:48.949939Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1503.06598","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:09:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EA9Q5+mC3EsQ0vsU9y8ZKnWCi4NW6iXSDNlCb6ScAFmQZJdqk5V3gpDi+MvHHKSr8pKJbrff+FLy5NfoGb3SAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T17:01:36.640217Z"},"content_sha256":"60971678fc160d6816df1c56328f7c1d587980ab706c1adcc78d6d68e727fc58","schema_version":"1.0","event_id":"sha256:60971678fc160d6816df1c56328f7c1d587980ab706c1adcc78d6d68e727fc58"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:THWAP7SW53GMTFHGLJ6OQSO5C3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Identifying Web Tables - Supporting a Neglected Type of Content on the Web","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Dmitry Mouromtsev, Mikhail Galkin, S\\\"oren Auer","submitted_at":"2015-03-23T11:05:26Z","abstract_excerpt":"The abundance of the data in the Internet facilitates the improvement of extraction and processing tools. The trend in the open data publishing encourages the adoption of structured formats like CSV and RDF. However, there is still a plethora of unstructured data on the Web which we assume contain semantics. For this reason, we propose an approach to derive semantics from web tables which are still the most popular publishing tool on the Web. The paper also discusses methods and services of unstructured data extraction and processing as well as machine learning techniques to enhance such a wor"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1503.06598","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:09:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NaDYi3IkauF4iibs6Ubpc3pSH9oaLBUPVZxyjlT8PzwHogJGf1pb5ytHsCTq2j/gXz88tQd6obYkHCof0sxdBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T17:01:36.640706Z"},"content_sha256":"8fe8e9220dfbf06ac3528fb9b0a4208c2a1b21a2e20bfae3fb68ed9b0955a852","schema_version":"1.0","event_id":"sha256:8fe8e9220dfbf06ac3528fb9b0a4208c2a1b21a2e20bfae3fb68ed9b0955a852"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/THWAP7SW53GMTFHGLJ6OQSO5C3/bundle.json","state_url":"https://pith.science/pith/THWAP7SW53GMTFHGLJ6OQSO5C3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/THWAP7SW53GMTFHGLJ6OQSO5C3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T17:01:36Z","links":{"resolver":"https://pith.science/pith/THWAP7SW53GMTFHGLJ6OQSO5C3","bundle":"https://pith.science/pith/THWAP7SW53GMTFHGLJ6OQSO5C3/bundle.json","state":"https://pith.science/pith/THWAP7SW53GMTFHGLJ6OQSO5C3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/THWAP7SW53GMTFHGLJ6OQSO5C3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:THWAP7SW53GMTFHGLJ6OQSO5C3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1d7947ff3a0f73d23ca80535880f146c047896876aa01ef567296b557444ac94","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-03-23T11:05:26Z","title_canon_sha256":"e8458ec9b3a5786e6f240fc851bd3ec62a9a20a4e02b220b9f262f8591796768"},"schema_version":"1.0","source":{"id":"1503.06598","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1503.06598","created_at":"2026-05-18T01:09:48Z"},{"alias_kind":"arxiv_version","alias_value":"1503.06598v1","created_at":"2026-05-18T01:09:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1503.06598","created_at":"2026-05-18T01:09:48Z"},{"alias_kind":"pith_short_12","alias_value":"THWAP7SW53GM","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_16","alias_value":"THWAP7SW53GMTFHG","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_8","alias_value":"THWAP7SW","created_at":"2026-05-18T12:29:42Z"}],"graph_snapshots":[{"event_id":"sha256:8fe8e9220dfbf06ac3528fb9b0a4208c2a1b21a2e20bfae3fb68ed9b0955a852","target":"graph","created_at":"2026-05-18T01:09:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The abundance of the data in the Internet facilitates the improvement of extraction and processing tools. The trend in the open data publishing encourages the adoption of structured formats like CSV and RDF. However, there is still a plethora of unstructured data on the Web which we assume contain semantics. For this reason, we propose an approach to derive semantics from web tables which are still the most popular publishing tool on the Web. The paper also discusses methods and services of unstructured data extraction and processing as well as machine learning techniques to enhance such a wor","authors_text":"Dmitry Mouromtsev, Mikhail Galkin, S\\\"oren Auer","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-03-23T11:05:26Z","title":"Identifying Web Tables - Supporting a Neglected Type of Content on the Web"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1503.06598","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:60971678fc160d6816df1c56328f7c1d587980ab706c1adcc78d6d68e727fc58","target":"record","created_at":"2026-05-18T01:09:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1d7947ff3a0f73d23ca80535880f146c047896876aa01ef567296b557444ac94","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-03-23T11:05:26Z","title_canon_sha256":"e8458ec9b3a5786e6f240fc851bd3ec62a9a20a4e02b220b9f262f8591796768"},"schema_version":"1.0","source":{"id":"1503.06598","kind":"arxiv","version":1}},"canonical_sha256":"99ec07fe56eeccc994e65a7ce849dd16e2a82efb709c893813e403bbedb88cfd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"99ec07fe56eeccc994e65a7ce849dd16e2a82efb709c893813e403bbedb88cfd","first_computed_at":"2026-05-18T01:09:48.949939Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:09:48.949939Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/FRxQJGIXXoQfuc9WJ/uVHFJQ0wFN5LGdJvIrH8q3c6ko7Y2MEpZoToRwcFbyZy1ml45cISpWUlNO51m7Vl2CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:09:48.950622Z","signed_message":"canonical_sha256_bytes"},"source_id":"1503.06598","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:60971678fc160d6816df1c56328f7c1d587980ab706c1adcc78d6d68e727fc58","sha256:8fe8e9220dfbf06ac3528fb9b0a4208c2a1b21a2e20bfae3fb68ed9b0955a852"],"state_sha256":"f0e6b3540e1da44fb67f372dcb4dd999df900d206b7868e8d608cfa0a2391b38"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QKLPMFZH4hBV2c2/1ErpmGNI3RaPsb94PqKBTb2NRIXmev5JiGRjDVet9TKi2k1xjnC53KPg8yjJobvelGXQCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T17:01:36.643372Z","bundle_sha256":"1c9f5be1814e7e5d99574cb0c48fdd6dff7575b975e64ca3af20faab28724e0c"}}