{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:J5ON7RME6I5ZF5QWBE7C6FJQTP","short_pith_number":"pith:J5ON7RME","canonical_record":{"source":{"id":"1208.1860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-08-09T10:02:35Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"291b38f9db396fecf513ae0ea29ba831ed01af5cc98443f4efdd93f700beda1b","abstract_canon_sha256":"733a92cbc459fcb7fb65c2002232def900d5cc0a13b2836fcf9072a8c6267365"},"schema_version":"1.0"},"canonical_sha256":"4f5cdfc584f23b92f616093e2f15309bf2a7fccb4ebf2b950045cffc5926c25a","source":{"kind":"arxiv","id":"1208.1860","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1208.1860","created_at":"2026-05-18T03:49:09Z"},{"alias_kind":"arxiv_version","alias_value":"1208.1860v1","created_at":"2026-05-18T03:49:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1208.1860","created_at":"2026-05-18T03:49:09Z"},{"alias_kind":"pith_short_12","alias_value":"J5ON7RME6I5Z","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_16","alias_value":"J5ON7RME6I5ZF5QW","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_8","alias_value":"J5ON7RME","created_at":"2026-05-18T12:27:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:J5ON7RME6I5ZF5QWBE7C6FJQTP","target":"record","payload":{"canonical_record":{"source":{"id":"1208.1860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-08-09T10:02:35Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"291b38f9db396fecf513ae0ea29ba831ed01af5cc98443f4efdd93f700beda1b","abstract_canon_sha256":"733a92cbc459fcb7fb65c2002232def900d5cc0a13b2836fcf9072a8c6267365"},"schema_version":"1.0"},"canonical_sha256":"4f5cdfc584f23b92f616093e2f15309bf2a7fccb4ebf2b950045cffc5926c25a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:49:09.812563Z","signature_b64":"ZsGCy9HRRqXPGhoXnF7rvpTBC+4aeT9ObtInJEeUVyLUqaLLK88hxVQ55wQW9yopA/UdgQADoi2G6gRcu0yeDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4f5cdfc584f23b92f616093e2f15309bf2a7fccb4ebf2b950045cffc5926c25a","last_reissued_at":"2026-05-18T03:49:09.811999Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:49:09.811999Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1208.1860","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:49:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sy93FgI9XMoo1Z0QrpJ73CHJigqco7ZDNv5Bv2nJK9nyepOsTD1MgoFyQp/oREjaOzG+DTsogH0CD1lW8yqiAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T16:21:49.813992Z"},"content_sha256":"e34abab065f01af13dab8e118912bc58b11d806608dbefda8404e1aaf687e570","schema_version":"1.0","event_id":"sha256:e34abab065f01af13dab8e118912bc58b11d806608dbefda8404e1aaf687e570"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:J5ON7RME6I5ZF5QWBE7C6FJQTP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Scaling Multiple-Source Entity Resolution using Statistically Efficient Transfer Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.DB","authors_text":"Benjamin I. P. Rubinstein, Jim Gemmell, Sahand Negahban","submitted_at":"2012-08-09T10:02:35Z","abstract_excerpt":"We consider a serious, previously-unexplored challenge facing almost all approaches to scaling up entity resolution (ER) to multiple data sources: the prohibitive cost of labeling training data for supervised learning of similarity scores for each pair of sources. While there exists a rich literature describing almost all aspects of pairwise ER, this new challenge is arising now due to the unprecedented ability to acquire and store data from online sources, features driven by ER such as enriched search verticals, and the uniqueness of noisy and missing data characteristics for each source. We "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1208.1860","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:49:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uWpYOU7u66jeDeJdSXxwX4R+APxIH7aSIwkf1Xm29na52z/Dh9dKWz8nt/azJzzuhwekwROV3V5CHKamMc+yBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T16:21:49.814688Z"},"content_sha256":"084cf53b358df820b11f0a2f25ff049b81ac34958f45ee0d0ffe3cd5fea5f13f","schema_version":"1.0","event_id":"sha256:084cf53b358df820b11f0a2f25ff049b81ac34958f45ee0d0ffe3cd5fea5f13f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP/bundle.json","state_url":"https://pith.science/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T16:21:49Z","links":{"resolver":"https://pith.science/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP","bundle":"https://pith.science/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP/bundle.json","state":"https://pith.science/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/J5ON7RME6I5ZF5QWBE7C6FJQTP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:J5ON7RME6I5ZF5QWBE7C6FJQTP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"733a92cbc459fcb7fb65c2002232def900d5cc0a13b2836fcf9072a8c6267365","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-08-09T10:02:35Z","title_canon_sha256":"291b38f9db396fecf513ae0ea29ba831ed01af5cc98443f4efdd93f700beda1b"},"schema_version":"1.0","source":{"id":"1208.1860","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1208.1860","created_at":"2026-05-18T03:49:09Z"},{"alias_kind":"arxiv_version","alias_value":"1208.1860v1","created_at":"2026-05-18T03:49:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1208.1860","created_at":"2026-05-18T03:49:09Z"},{"alias_kind":"pith_short_12","alias_value":"J5ON7RME6I5Z","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_16","alias_value":"J5ON7RME6I5ZF5QW","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_8","alias_value":"J5ON7RME","created_at":"2026-05-18T12:27:09Z"}],"graph_snapshots":[{"event_id":"sha256:084cf53b358df820b11f0a2f25ff049b81ac34958f45ee0d0ffe3cd5fea5f13f","target":"graph","created_at":"2026-05-18T03:49:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider a serious, previously-unexplored challenge facing almost all approaches to scaling up entity resolution (ER) to multiple data sources: the prohibitive cost of labeling training data for supervised learning of similarity scores for each pair of sources. While there exists a rich literature describing almost all aspects of pairwise ER, this new challenge is arising now due to the unprecedented ability to acquire and store data from online sources, features driven by ER such as enriched search verticals, and the uniqueness of noisy and missing data characteristics for each source. We ","authors_text":"Benjamin I. P. Rubinstein, Jim Gemmell, Sahand Negahban","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-08-09T10:02:35Z","title":"Scaling Multiple-Source Entity Resolution using Statistically Efficient Transfer Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1208.1860","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e34abab065f01af13dab8e118912bc58b11d806608dbefda8404e1aaf687e570","target":"record","created_at":"2026-05-18T03:49:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"733a92cbc459fcb7fb65c2002232def900d5cc0a13b2836fcf9072a8c6267365","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2012-08-09T10:02:35Z","title_canon_sha256":"291b38f9db396fecf513ae0ea29ba831ed01af5cc98443f4efdd93f700beda1b"},"schema_version":"1.0","source":{"id":"1208.1860","kind":"arxiv","version":1}},"canonical_sha256":"4f5cdfc584f23b92f616093e2f15309bf2a7fccb4ebf2b950045cffc5926c25a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4f5cdfc584f23b92f616093e2f15309bf2a7fccb4ebf2b950045cffc5926c25a","first_computed_at":"2026-05-18T03:49:09.811999Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:49:09.811999Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ZsGCy9HRRqXPGhoXnF7rvpTBC+4aeT9ObtInJEeUVyLUqaLLK88hxVQ55wQW9yopA/UdgQADoi2G6gRcu0yeDA==","signature_status":"signed_v1","signed_at":"2026-05-18T03:49:09.812563Z","signed_message":"canonical_sha256_bytes"},"source_id":"1208.1860","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e34abab065f01af13dab8e118912bc58b11d806608dbefda8404e1aaf687e570","sha256:084cf53b358df820b11f0a2f25ff049b81ac34958f45ee0d0ffe3cd5fea5f13f"],"state_sha256":"2fd29b2675d2475a2268d17053b600c5f61953fe5753cbc839738848a5e9cc8f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KbYUP3KDE8imcP+vXIhKn5mhJSL+HfOk1KsUf3zumbEx4b9Qb/58hLU7wBj4Gn3RKRWiX9KuiRKRzpygHMF4BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T16:21:49.818759Z","bundle_sha256":"03767ad6b454a43dcb8cceae31b54e09d9b62f93d241686d15033b74e72bb6a7"}}