{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:OE3SCJBRQELVXXPEZOY5KCBU57","short_pith_number":"pith:OE3SCJBR","schema_version":"1.0","canonical_sha256":"713721243181175bdde4cbb1d50834effa88fa94527ce80824cf2e34614ab032","source":{"kind":"arxiv","id":"1711.05186","version":2},"attestation_state":"computed","paper":{"title":"False Positive and Cross-relation Signals in Distant Supervision Data","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anca Dumitrache, Chris Welty, Lora Aroyo","submitted_at":"2017-11-14T16:50:40Z","abstract_excerpt":"Distant supervision (DS) is a well-established method for relation extraction from text, based on the assumption that when a knowledge-base contains a relation between a term pair, then sentences that contain that pair are likely to express the relation. In this paper, we use the results of a crowdsourcing relation extraction task to identify two problems with DS data quality: the widely varying degree of false positives across different relations, and the observed causal connection between relations that are not considered by the DS method. The crowdsourcing data aggregation is performed usin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.05186","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-11-14T16:50:40Z","cross_cats_sorted":[],"title_canon_sha256":"3937f6c889c861d7bff422fc3ef56f4d10a5b994b55ec60cc55d618900d41545","abstract_canon_sha256":"8ed731f0df80896868798ad08c22ad9db9c1650a4652422ee5f7e40c92f9466e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:12.830940Z","signature_b64":"18wkax8x+FidsF840ImKsqt2eC5bFqL/kNnWdeMysxiI3W24zEh7NXbW349p+zZKCX44dKXqss+IBFwKkupjBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"713721243181175bdde4cbb1d50834effa88fa94527ce80824cf2e34614ab032","last_reissued_at":"2026-05-18T00:29:12.830388Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:12.830388Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"False Positive and Cross-relation Signals in Distant Supervision Data","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anca Dumitrache, Chris Welty, Lora Aroyo","submitted_at":"2017-11-14T16:50:40Z","abstract_excerpt":"Distant supervision (DS) is a well-established method for relation extraction from text, based on the assumption that when a knowledge-base contains a relation between a term pair, then sentences that contain that pair are likely to express the relation. In this paper, we use the results of a crowdsourcing relation extraction task to identify two problems with DS data quality: the widely varying degree of false positives across different relations, and the observed causal connection between relations that are not considered by the DS method. The crowdsourcing data aggregation is performed usin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.05186","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.05186","created_at":"2026-05-18T00:29:12.830488+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.05186v2","created_at":"2026-05-18T00:29:12.830488+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.05186","created_at":"2026-05-18T00:29:12.830488+00:00"},{"alias_kind":"pith_short_12","alias_value":"OE3SCJBRQELV","created_at":"2026-05-18T12:31:34.259226+00:00"},{"alias_kind":"pith_short_16","alias_value":"OE3SCJBRQELVXXPE","created_at":"2026-05-18T12:31:34.259226+00:00"},{"alias_kind":"pith_short_8","alias_value":"OE3SCJBR","created_at":"2026-05-18T12:31:34.259226+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57","json":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57.json","graph_json":"https://pith.science/api/pith-number/OE3SCJBRQELVXXPEZOY5KCBU57/graph.json","events_json":"https://pith.science/api/pith-number/OE3SCJBRQELVXXPEZOY5KCBU57/events.json","paper":"https://pith.science/paper/OE3SCJBR"},"agent_actions":{"view_html":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57","download_json":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57.json","view_paper":"https://pith.science/paper/OE3SCJBR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.05186&json=true","fetch_graph":"https://pith.science/api/pith-number/OE3SCJBRQELVXXPEZOY5KCBU57/graph.json","fetch_events":"https://pith.science/api/pith-number/OE3SCJBRQELVXXPEZOY5KCBU57/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57/action/storage_attestation","attest_author":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57/action/author_attestation","sign_citation":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57/action/citation_signature","submit_replication":"https://pith.science/pith/OE3SCJBRQELVXXPEZOY5KCBU57/action/replication_record"}},"created_at":"2026-05-18T00:29:12.830488+00:00","updated_at":"2026-05-18T00:29:12.830488+00:00"}