{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2010:PAPA2VAOQPOZBWALDRHEPG5OCB","short_pith_number":"pith:PAPA2VAO","canonical_record":{"source":{"id":"1005.4298","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-24T10:35:50Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"35e1bb20bd8b0f835ffdb83d7e491b09a28bece17b04255319d210dbec0a95fe","abstract_canon_sha256":"5da1757df5b6442d74b1c9772dff4c94ca2124d2f732098ec753b73aa49a0ff0"},"schema_version":"1.0"},"canonical_sha256":"781e0d540e83dd90d80b1c4e479bae104b86702beecf22530b64f5786296fe63","source":{"kind":"arxiv","id":"1005.4298","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1005.4298","created_at":"2026-05-18T02:23:56Z"},{"alias_kind":"arxiv_version","alias_value":"1005.4298v1","created_at":"2026-05-18T02:23:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1005.4298","created_at":"2026-05-18T02:23:56Z"},{"alias_kind":"pith_short_12","alias_value":"PAPA2VAOQPOZ","created_at":"2026-05-18T12:26:12Z"},{"alias_kind":"pith_short_16","alias_value":"PAPA2VAOQPOZBWAL","created_at":"2026-05-18T12:26:12Z"},{"alias_kind":"pith_short_8","alias_value":"PAPA2VAO","created_at":"2026-05-18T12:26:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2010:PAPA2VAOQPOZBWALDRHEPG5OCB","target":"record","payload":{"canonical_record":{"source":{"id":"1005.4298","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-24T10:35:50Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"35e1bb20bd8b0f835ffdb83d7e491b09a28bece17b04255319d210dbec0a95fe","abstract_canon_sha256":"5da1757df5b6442d74b1c9772dff4c94ca2124d2f732098ec753b73aa49a0ff0"},"schema_version":"1.0"},"canonical_sha256":"781e0d540e83dd90d80b1c4e479bae104b86702beecf22530b64f5786296fe63","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:23:56.231007Z","signature_b64":"MD19Xgq1j+fozR8iwucLlM/zyF5vChrIgsaJHvA9k9gqU7LwnNY4Rk3Er20BzjbhVtSNpiGhVhnOlVw3HxJCDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"781e0d540e83dd90d80b1c4e479bae104b86702beecf22530b64f5786296fe63","last_reissued_at":"2026-05-18T02:23:56.230329Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:23:56.230329Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1005.4298","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:23:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jHGG99eGFbHY9+YReBGBOU7MQjaKZHyFP5TPSBghI8qEmgx/1M+YrsFqNctJMA+VwB/WIkug970WpAOGQwAxAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T21:20:44.342302Z"},"content_sha256":"90aa537bfd035aa713d8d4dcd63e898a74768f203754030f43bb1e27d46b7331","schema_version":"1.0","event_id":"sha256:90aa537bfd035aa713d8d4dcd63e898a74768f203754030f43bb1e27d46b7331"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2010:PAPA2VAOQPOZBWALDRHEPG5OCB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Distantly Labeling Data for Large Scale Cross-Document Coreference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.AI","authors_text":"Andrew McCallum, Michael Wick, Sameer Singh","submitted_at":"2010-05-24T10:35:50Z","abstract_excerpt":"Cross-document coreference, the problem of resolving entity mentions across multi-document collections, is crucial to automated knowledge base construction and data mining tasks. However, the scarcity of large labeled data sets has hindered supervised machine learning research for this task. In this paper we develop and demonstrate an approach based on ``distantly-labeling'' a data set from which we can train a discriminative cross-document coreference model. In particular we build a dataset of more than a million people mentions extracted from 3.5 years of New York Times articles, leverage Wi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1005.4298","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:23:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wAS4EPnl4XiQeOONWBJDoDQ+Wrqn4hPfRoTezHHkFoI25M+VpIt7nVpsviRmzO7EWBjKogMHmaQsMPGWJydEAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T21:20:44.342671Z"},"content_sha256":"11edc05a91b4d35a4540360dcd0bf642022c5699d3ee557185f461312ba74a49","schema_version":"1.0","event_id":"sha256:11edc05a91b4d35a4540360dcd0bf642022c5699d3ee557185f461312ba74a49"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PAPA2VAOQPOZBWALDRHEPG5OCB/bundle.json","state_url":"https://pith.science/pith/PAPA2VAOQPOZBWALDRHEPG5OCB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PAPA2VAOQPOZBWALDRHEPG5OCB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T21:20:44Z","links":{"resolver":"https://pith.science/pith/PAPA2VAOQPOZBWALDRHEPG5OCB","bundle":"https://pith.science/pith/PAPA2VAOQPOZBWALDRHEPG5OCB/bundle.json","state":"https://pith.science/pith/PAPA2VAOQPOZBWALDRHEPG5OCB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PAPA2VAOQPOZBWALDRHEPG5OCB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2010:PAPA2VAOQPOZBWALDRHEPG5OCB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5da1757df5b6442d74b1c9772dff4c94ca2124d2f732098ec753b73aa49a0ff0","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-24T10:35:50Z","title_canon_sha256":"35e1bb20bd8b0f835ffdb83d7e491b09a28bece17b04255319d210dbec0a95fe"},"schema_version":"1.0","source":{"id":"1005.4298","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1005.4298","created_at":"2026-05-18T02:23:56Z"},{"alias_kind":"arxiv_version","alias_value":"1005.4298v1","created_at":"2026-05-18T02:23:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1005.4298","created_at":"2026-05-18T02:23:56Z"},{"alias_kind":"pith_short_12","alias_value":"PAPA2VAOQPOZ","created_at":"2026-05-18T12:26:12Z"},{"alias_kind":"pith_short_16","alias_value":"PAPA2VAOQPOZBWAL","created_at":"2026-05-18T12:26:12Z"},{"alias_kind":"pith_short_8","alias_value":"PAPA2VAO","created_at":"2026-05-18T12:26:12Z"}],"graph_snapshots":[{"event_id":"sha256:11edc05a91b4d35a4540360dcd0bf642022c5699d3ee557185f461312ba74a49","target":"graph","created_at":"2026-05-18T02:23:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Cross-document coreference, the problem of resolving entity mentions across multi-document collections, is crucial to automated knowledge base construction and data mining tasks. However, the scarcity of large labeled data sets has hindered supervised machine learning research for this task. In this paper we develop and demonstrate an approach based on ``distantly-labeling'' a data set from which we can train a discriminative cross-document coreference model. In particular we build a dataset of more than a million people mentions extracted from 3.5 years of New York Times articles, leverage Wi","authors_text":"Andrew McCallum, Michael Wick, Sameer Singh","cross_cats":["cs.IR","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-24T10:35:50Z","title":"Distantly Labeling Data for Large Scale Cross-Document Coreference"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1005.4298","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:90aa537bfd035aa713d8d4dcd63e898a74768f203754030f43bb1e27d46b7331","target":"record","created_at":"2026-05-18T02:23:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5da1757df5b6442d74b1c9772dff4c94ca2124d2f732098ec753b73aa49a0ff0","cross_cats_sorted":["cs.IR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-24T10:35:50Z","title_canon_sha256":"35e1bb20bd8b0f835ffdb83d7e491b09a28bece17b04255319d210dbec0a95fe"},"schema_version":"1.0","source":{"id":"1005.4298","kind":"arxiv","version":1}},"canonical_sha256":"781e0d540e83dd90d80b1c4e479bae104b86702beecf22530b64f5786296fe63","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"781e0d540e83dd90d80b1c4e479bae104b86702beecf22530b64f5786296fe63","first_computed_at":"2026-05-18T02:23:56.230329Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:23:56.230329Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"MD19Xgq1j+fozR8iwucLlM/zyF5vChrIgsaJHvA9k9gqU7LwnNY4Rk3Er20BzjbhVtSNpiGhVhnOlVw3HxJCDA==","signature_status":"signed_v1","signed_at":"2026-05-18T02:23:56.231007Z","signed_message":"canonical_sha256_bytes"},"source_id":"1005.4298","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:90aa537bfd035aa713d8d4dcd63e898a74768f203754030f43bb1e27d46b7331","sha256:11edc05a91b4d35a4540360dcd0bf642022c5699d3ee557185f461312ba74a49"],"state_sha256":"48caf77be8db28e2ae342bd54da8448498f7e4a30c7735c552de670e486db579"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lkYRb3+hKGuwBH1w9LUUmCMb9G5j+pK5qNVau4HlDrqCBCgr1bedQ/3tbQ5JeQpfGGw0AlPgXTsXlW/1BtGcCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T21:20:44.345630Z","bundle_sha256":"bb4892d3a60622abc5b6a742c656987ce0d909a7f7e81632ae23cc0abe1a519a"}}