{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:3WCPSNP4CXSC2EE6FJYO2RM4JC","short_pith_number":"pith:3WCPSNP4","schema_version":"1.0","canonical_sha256":"dd84f935fc15e42d109e2a70ed459c489efebb57bd4baef6ace8df4cc5f25045","source":{"kind":"arxiv","id":"1503.00309","version":1},"attestation_state":"computed","paper":{"title":"Scaling up Copy Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Divesh Srivastava, Kenneth B. Lyons, Weiyi Meng, Xian Li, Xin Luna Dong","submitted_at":"2015-03-01T17:00:29Z","abstract_excerpt":"Recent research shows that copying is prevalent for Deep-Web data and considering copying can significantly improve truth finding from conflicting values. However, existing copy detection techniques do not scale for large sizes and numbers of data sources, so truth finding can be slowed down by one to two orders of magnitude compared with the corresponding techniques that do not consider copying. In this paper, we study {\\em how to improve scalability of copy detection on structured data}.\n  Our algorithm builds an inverted index for each \\emph{shared} value and processes the index entries in "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1503.00309","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-03-01T17:00:29Z","cross_cats_sorted":[],"title_canon_sha256":"01c9f27df44c45914c8f7a0fbdff4c3064ce13bf2c0b61f6e54639cfb0b494d1","abstract_canon_sha256":"2b07b5d7b1a689754bcc66ea58f428130b35f7af2209f6cab2a0e032c82ba63b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:25:54.703580Z","signature_b64":"/9XlAapGE2GBDngZkNSFkr0+IZq1FyyGIlW6Wwv2eqY+7MyVvhpUMiM4nyMxGfNMP9B+G7aj/0HMBfSSmAoJAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dd84f935fc15e42d109e2a70ed459c489efebb57bd4baef6ace8df4cc5f25045","last_reissued_at":"2026-05-18T02:25:54.703157Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:25:54.703157Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Scaling up Copy Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Divesh Srivastava, Kenneth B. Lyons, Weiyi Meng, Xian Li, Xin Luna Dong","submitted_at":"2015-03-01T17:00:29Z","abstract_excerpt":"Recent research shows that copying is prevalent for Deep-Web data and considering copying can significantly improve truth finding from conflicting values. However, existing copy detection techniques do not scale for large sizes and numbers of data sources, so truth finding can be slowed down by one to two orders of magnitude compared with the corresponding techniques that do not consider copying. In this paper, we study {\\em how to improve scalability of copy detection on structured data}.\n  Our algorithm builds an inverted index for each \\emph{shared} value and processes the index entries in "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1503.00309","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1503.00309","created_at":"2026-05-18T02:25:54.703228+00:00"},{"alias_kind":"arxiv_version","alias_value":"1503.00309v1","created_at":"2026-05-18T02:25:54.703228+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1503.00309","created_at":"2026-05-18T02:25:54.703228+00:00"},{"alias_kind":"pith_short_12","alias_value":"3WCPSNP4CXSC","created_at":"2026-05-18T12:29:02.477457+00:00"},{"alias_kind":"pith_short_16","alias_value":"3WCPSNP4CXSC2EE6","created_at":"2026-05-18T12:29:02.477457+00:00"},{"alias_kind":"pith_short_8","alias_value":"3WCPSNP4","created_at":"2026-05-18T12:29:02.477457+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC","json":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC.json","graph_json":"https://pith.science/api/pith-number/3WCPSNP4CXSC2EE6FJYO2RM4JC/graph.json","events_json":"https://pith.science/api/pith-number/3WCPSNP4CXSC2EE6FJYO2RM4JC/events.json","paper":"https://pith.science/paper/3WCPSNP4"},"agent_actions":{"view_html":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC","download_json":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC.json","view_paper":"https://pith.science/paper/3WCPSNP4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1503.00309&json=true","fetch_graph":"https://pith.science/api/pith-number/3WCPSNP4CXSC2EE6FJYO2RM4JC/graph.json","fetch_events":"https://pith.science/api/pith-number/3WCPSNP4CXSC2EE6FJYO2RM4JC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC/action/storage_attestation","attest_author":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC/action/author_attestation","sign_citation":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC/action/citation_signature","submit_replication":"https://pith.science/pith/3WCPSNP4CXSC2EE6FJYO2RM4JC/action/replication_record"}},"created_at":"2026-05-18T02:25:54.703228+00:00","updated_at":"2026-05-18T02:25:54.703228+00:00"}