{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:WULJI2YCTSWPHXLBBOAYYJ2KI2","short_pith_number":"pith:WULJI2YC","canonical_record":{"source":{"id":"1505.00841","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-04T23:21:00Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"74c76b5e919a2eb26edda2358ce487d43cc80dcbfd87c66bd6bd3a0cf432d197","abstract_canon_sha256":"7171b78e2d0de6d10a46976b92759293220caa41b7e832725035dbd133a469fa"},"schema_version":"1.0"},"canonical_sha256":"b516946b029cacf3dd610b818c274a46bee484ba54ff835ac51371f2f31c03ec","source":{"kind":"arxiv","id":"1505.00841","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.00841","created_at":"2026-05-18T01:11:00Z"},{"alias_kind":"arxiv_version","alias_value":"1505.00841v1","created_at":"2026-05-18T01:11:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.00841","created_at":"2026-05-18T01:11:00Z"},{"alias_kind":"pith_short_12","alias_value":"WULJI2YCTSWP","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_16","alias_value":"WULJI2YCTSWPHXLB","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_8","alias_value":"WULJI2YC","created_at":"2026-05-18T12:29:47Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:WULJI2YCTSWPHXLBBOAYYJ2KI2","target":"record","payload":{"canonical_record":{"source":{"id":"1505.00841","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-04T23:21:00Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"74c76b5e919a2eb26edda2358ce487d43cc80dcbfd87c66bd6bd3a0cf432d197","abstract_canon_sha256":"7171b78e2d0de6d10a46976b92759293220caa41b7e832725035dbd133a469fa"},"schema_version":"1.0"},"canonical_sha256":"b516946b029cacf3dd610b818c274a46bee484ba54ff835ac51371f2f31c03ec","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:11:00.152635Z","signature_b64":"N55AzlndErBDlCgGetC4tu9go39WKjmaDHbR3DY8+Vq6dpksn/kXj5CU9i+VZRuU3r4pme5PKoH7cQn7eWwLAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b516946b029cacf3dd610b818c274a46bee484ba54ff835ac51371f2f31c03ec","last_reissued_at":"2026-05-18T01:11:00.151985Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:11:00.151985Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1505.00841","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:11:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VO6ZDONyclwNgwrETuwZlxNak9zaIKsLYOY0a8agXaJRRZsmnT99kFtWf0LsmR8GLAjTawQ99t/ssGIDJve6Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:21:33.541820Z"},"content_sha256":"addacb27fcbde6c14058908436d47fd3f222c34cbba7334ff113d7a2059a62f7","schema_version":"1.0","event_id":"sha256:addacb27fcbde6c14058908436d47fd3f222c34cbba7334ff113d7a2059a62f7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:WULJI2YCTSWPHXLBBOAYYJ2KI2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Harvesting Entities from the Web Using Unique Identifiers -- IBEX","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.DB","authors_text":"Aliaksandr Talaika, Antoine Amarilli, Fabian M. Suchanek, Joanna Biega","submitted_at":"2015-05-04T23:21:00Z","abstract_excerpt":"In this paper we study the prevalence of unique entity identifiers on the Web. These are, e.g., ISBNs (for books), GTINs (for commercial products), DOIs (for documents), email addresses, and others. We show how these identifiers can be harvested systematically from Web pages, and how they can be associated with human-readable names for the entities at large scale.\n  Starting with a simple extraction of identifiers and names from Web pages, we show how we can use the properties of unique identifiers to filter out noise and clean up the extraction result on the entire corpus. The end result is a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.00841","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:11:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jBhPnQitoQ7XHJ0JvOYGkC1aFiTqeDzMbPA6/QbnhNP/Rqv6Zcd3/sb6qYbVhkdx3rsiBEYiglrtl0nSBsRoAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:21:33.542533Z"},"content_sha256":"b6e08edfd84ce3552a6fd0a08f0bb246e24fa5b9ec4ffb136808f4307d773149","schema_version":"1.0","event_id":"sha256:b6e08edfd84ce3552a6fd0a08f0bb246e24fa5b9ec4ffb136808f4307d773149"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2/bundle.json","state_url":"https://pith.science/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:21:33Z","links":{"resolver":"https://pith.science/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2","bundle":"https://pith.science/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2/bundle.json","state":"https://pith.science/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WULJI2YCTSWPHXLBBOAYYJ2KI2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:WULJI2YCTSWPHXLBBOAYYJ2KI2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7171b78e2d0de6d10a46976b92759293220caa41b7e832725035dbd133a469fa","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-04T23:21:00Z","title_canon_sha256":"74c76b5e919a2eb26edda2358ce487d43cc80dcbfd87c66bd6bd3a0cf432d197"},"schema_version":"1.0","source":{"id":"1505.00841","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.00841","created_at":"2026-05-18T01:11:00Z"},{"alias_kind":"arxiv_version","alias_value":"1505.00841v1","created_at":"2026-05-18T01:11:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.00841","created_at":"2026-05-18T01:11:00Z"},{"alias_kind":"pith_short_12","alias_value":"WULJI2YCTSWP","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_16","alias_value":"WULJI2YCTSWPHXLB","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_8","alias_value":"WULJI2YC","created_at":"2026-05-18T12:29:47Z"}],"graph_snapshots":[{"event_id":"sha256:b6e08edfd84ce3552a6fd0a08f0bb246e24fa5b9ec4ffb136808f4307d773149","target":"graph","created_at":"2026-05-18T01:11:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this paper we study the prevalence of unique entity identifiers on the Web. These are, e.g., ISBNs (for books), GTINs (for commercial products), DOIs (for documents), email addresses, and others. We show how these identifiers can be harvested systematically from Web pages, and how they can be associated with human-readable names for the entities at large scale.\n  Starting with a simple extraction of identifiers and names from Web pages, we show how we can use the properties of unique identifiers to filter out noise and clean up the extraction result on the entire corpus. The end result is a","authors_text":"Aliaksandr Talaika, Antoine Amarilli, Fabian M. Suchanek, Joanna Biega","cross_cats":["cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-04T23:21:00Z","title":"Harvesting Entities from the Web Using Unique Identifiers -- IBEX"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.00841","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:addacb27fcbde6c14058908436d47fd3f222c34cbba7334ff113d7a2059a62f7","target":"record","created_at":"2026-05-18T01:11:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7171b78e2d0de6d10a46976b92759293220caa41b7e832725035dbd133a469fa","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-04T23:21:00Z","title_canon_sha256":"74c76b5e919a2eb26edda2358ce487d43cc80dcbfd87c66bd6bd3a0cf432d197"},"schema_version":"1.0","source":{"id":"1505.00841","kind":"arxiv","version":1}},"canonical_sha256":"b516946b029cacf3dd610b818c274a46bee484ba54ff835ac51371f2f31c03ec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b516946b029cacf3dd610b818c274a46bee484ba54ff835ac51371f2f31c03ec","first_computed_at":"2026-05-18T01:11:00.151985Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:11:00.151985Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"N55AzlndErBDlCgGetC4tu9go39WKjmaDHbR3DY8+Vq6dpksn/kXj5CU9i+VZRuU3r4pme5PKoH7cQn7eWwLAA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:11:00.152635Z","signed_message":"canonical_sha256_bytes"},"source_id":"1505.00841","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:addacb27fcbde6c14058908436d47fd3f222c34cbba7334ff113d7a2059a62f7","sha256:b6e08edfd84ce3552a6fd0a08f0bb246e24fa5b9ec4ffb136808f4307d773149"],"state_sha256":"f47040a40cc393585c2a49e38f4a72590491a2294c6708c9aa731ef0e338eeb4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4McyDZobIExTwq4sIV/Je2Jxn7TZR6fbhXkKvlyLLnh/qC5gOKrJs305sB5po7t0L4vRlWElBF4kwdqBuGVQDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:21:33.546592Z","bundle_sha256":"935f108a648dbd8863a205ed2f0c1ddefca302b191513fc3c94bc9a5a2d68f29"}}