{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:KBQIHNYRP4JYROBMUHF4CIJD2K","short_pith_number":"pith:KBQIHNYR","canonical_record":{"source":{"id":"1803.00446","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T15:33:06Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"fb2991b3e8382a703bf8348876f1073a66767c6b5e2df822c01352244ff26d27","abstract_canon_sha256":"5221349eb6a8563b0754b8a4867eb2449167cc13034941a4280ed216e187df2c"},"schema_version":"1.0"},"canonical_sha256":"506083b7117f1388b82ca1cbc12123d28ac1549d4b526e2458096909254641c6","source":{"kind":"arxiv","id":"1803.00446","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00446","created_at":"2026-05-18T00:22:11Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00446v1","created_at":"2026-05-18T00:22:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00446","created_at":"2026-05-18T00:22:11Z"},{"alias_kind":"pith_short_12","alias_value":"KBQIHNYRP4JY","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_16","alias_value":"KBQIHNYRP4JYROBM","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_8","alias_value":"KBQIHNYR","created_at":"2026-05-18T12:32:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:KBQIHNYRP4JYROBMUHF4CIJD2K","target":"record","payload":{"canonical_record":{"source":{"id":"1803.00446","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T15:33:06Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"fb2991b3e8382a703bf8348876f1073a66767c6b5e2df822c01352244ff26d27","abstract_canon_sha256":"5221349eb6a8563b0754b8a4867eb2449167cc13034941a4280ed216e187df2c"},"schema_version":"1.0"},"canonical_sha256":"506083b7117f1388b82ca1cbc12123d28ac1549d4b526e2458096909254641c6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:11.733731Z","signature_b64":"dddhDzLJ39fGu+i0RSp7w9B9QTIFkYL1A4aV/s5EPRfumKC4re1CtH9Ztm7oMMEbaRVnxxhBI2vM7v7dnMpaDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"506083b7117f1388b82ca1cbc12123d28ac1549d4b526e2458096909254641c6","last_reissued_at":"2026-05-18T00:22:11.733118Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:11.733118Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.00446","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:22:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zZF+H6RYqrua5t8+/+dQ+8X71YCtEdVkWWMDEHDsY18qE46W8g0S5DaUf3pCetP97mwmbzuCJRtwd/fCEiHbDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T16:32:09.752886Z"},"content_sha256":"55531413ae2a5e15310f0a687deccd06da714348c075284298754d86e25136e3","schema_version":"1.0","event_id":"sha256:55531413ae2a5e15310f0a687deccd06da714348c075284298754d86e25136e3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:KBQIHNYRP4JYROBMUHF4CIJD2K","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Inferring Missing Categorical Information in Noisy and Sparse Web Markup","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Elena Demidova, Nicolas Tempelmeier, Stefan Dietze","submitted_at":"2018-03-01T15:33:06Z","abstract_excerpt":"Embedded markup of Web pages has seen widespread adoption throughout the past years driven by standards such as RDFa and Microdata and initiatives such as schema.org, where recent studies show an adoption by 39% of all Web pages already in 2016. While this constitutes an important information source for tasks such as Web search, Web page classification or knowledge graph augmentation, individual markup nodes are usually sparsely described and often lack essential information. For instance, from 26 million nodes describing events within the Common Crawl in 2016, 59% of nodes provide less than s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00446","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:22:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FVRefZ8pw4I6GPbbQa4L+7QqhCHxY+wodNZ5AVS8QCgXZAL1Hg0MR3qZUqSTXANe0+flOWCGZ+sUFqR8vuOIDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T16:32:09.753535Z"},"content_sha256":"75afe0d8c8504f79a0c3b8af8e4a61f916d67bfba324507ab4be39216672af61","schema_version":"1.0","event_id":"sha256:75afe0d8c8504f79a0c3b8af8e4a61f916d67bfba324507ab4be39216672af61"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KBQIHNYRP4JYROBMUHF4CIJD2K/bundle.json","state_url":"https://pith.science/pith/KBQIHNYRP4JYROBMUHF4CIJD2K/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KBQIHNYRP4JYROBMUHF4CIJD2K/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T16:32:09Z","links":{"resolver":"https://pith.science/pith/KBQIHNYRP4JYROBMUHF4CIJD2K","bundle":"https://pith.science/pith/KBQIHNYRP4JYROBMUHF4CIJD2K/bundle.json","state":"https://pith.science/pith/KBQIHNYRP4JYROBMUHF4CIJD2K/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KBQIHNYRP4JYROBMUHF4CIJD2K/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:KBQIHNYRP4JYROBMUHF4CIJD2K","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5221349eb6a8563b0754b8a4867eb2449167cc13034941a4280ed216e187df2c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T15:33:06Z","title_canon_sha256":"fb2991b3e8382a703bf8348876f1073a66767c6b5e2df822c01352244ff26d27"},"schema_version":"1.0","source":{"id":"1803.00446","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00446","created_at":"2026-05-18T00:22:11Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00446v1","created_at":"2026-05-18T00:22:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00446","created_at":"2026-05-18T00:22:11Z"},{"alias_kind":"pith_short_12","alias_value":"KBQIHNYRP4JY","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_16","alias_value":"KBQIHNYRP4JYROBM","created_at":"2026-05-18T12:32:33Z"},{"alias_kind":"pith_short_8","alias_value":"KBQIHNYR","created_at":"2026-05-18T12:32:33Z"}],"graph_snapshots":[{"event_id":"sha256:75afe0d8c8504f79a0c3b8af8e4a61f916d67bfba324507ab4be39216672af61","target":"graph","created_at":"2026-05-18T00:22:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Embedded markup of Web pages has seen widespread adoption throughout the past years driven by standards such as RDFa and Microdata and initiatives such as schema.org, where recent studies show an adoption by 39% of all Web pages already in 2016. While this constitutes an important information source for tasks such as Web search, Web page classification or knowledge graph augmentation, individual markup nodes are usually sparsely described and often lack essential information. For instance, from 26 million nodes describing events within the Common Crawl in 2016, 59% of nodes provide less than s","authors_text":"Elena Demidova, Nicolas Tempelmeier, Stefan Dietze","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T15:33:06Z","title":"Inferring Missing Categorical Information in Noisy and Sparse Web Markup"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00446","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:55531413ae2a5e15310f0a687deccd06da714348c075284298754d86e25136e3","target":"record","created_at":"2026-05-18T00:22:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5221349eb6a8563b0754b8a4867eb2449167cc13034941a4280ed216e187df2c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-01T15:33:06Z","title_canon_sha256":"fb2991b3e8382a703bf8348876f1073a66767c6b5e2df822c01352244ff26d27"},"schema_version":"1.0","source":{"id":"1803.00446","kind":"arxiv","version":1}},"canonical_sha256":"506083b7117f1388b82ca1cbc12123d28ac1549d4b526e2458096909254641c6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"506083b7117f1388b82ca1cbc12123d28ac1549d4b526e2458096909254641c6","first_computed_at":"2026-05-18T00:22:11.733118Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:22:11.733118Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dddhDzLJ39fGu+i0RSp7w9B9QTIFkYL1A4aV/s5EPRfumKC4re1CtH9Ztm7oMMEbaRVnxxhBI2vM7v7dnMpaDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:22:11.733731Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.00446","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:55531413ae2a5e15310f0a687deccd06da714348c075284298754d86e25136e3","sha256:75afe0d8c8504f79a0c3b8af8e4a61f916d67bfba324507ab4be39216672af61"],"state_sha256":"f77ef161f7ca177a22a73a02ddac144c2579d3934b45a4910b895db0e3effd25"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"N2XDq9Mj1RStOkuPiZU1xOj0+1JrN9bQy5xOSdqfqzL/w799TokHN/nGkeud82vGakcrU8Soku8tjhe+NDbcBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T16:32:09.756687Z","bundle_sha256":"aeb4ead94fdd27f12a900fb9b25e24c7cea4d22d51c9afee75edc519dcd31fd3"}}