{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:VHZF4ARAQQ7TGFDDPHOZSA6GUB","short_pith_number":"pith:VHZF4ARA","schema_version":"1.0","canonical_sha256":"a9f25e0220843f33146379dd9903c6a04ae0caded7502feffc3cbb4c4efb17d1","source":{"kind":"arxiv","id":"1512.04973","version":1},"attestation_state":"computed","paper":{"title":"An Operator for Entity Extraction in MapReduce","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.DB","authors_text":"Ndapandula Nakashole","submitted_at":"2015-12-15T21:23:20Z","abstract_excerpt":"Dictionary-based entity extraction involves finding mentions of dictionary entities in text. Text mentions are often noisy, containing spurious or missing words. Efficient algorithms for detecting approximate entity mentions follow one of two general techniques. The first approach is to build an index on the entities and perform index lookups of document substrings. The second approach recognizes that the number of substrings generated from documents can explode to large numbers, to get around this, they use a filter to prune many such substrings which do not match any dictionary entity and th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1512.04973","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-12-15T21:23:20Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"3cb022d85117e7d6882d0038bc0e7ecdbdb1db18330d44c7cbf0ea62bf04f4dc","abstract_canon_sha256":"137f184dcfde38626f98970b3c1cf022382ac8cdb47420ce4473ad2ae5ddcdd2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:24:13.866558Z","signature_b64":"UspSa6ccNO1mcmgqXpGfKUH+JvaEl8Ooc6EARxtCsouYl0utNl3jUAFPvD+ZWLlZWObTArwAu6LQz51cG0SHAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a9f25e0220843f33146379dd9903c6a04ae0caded7502feffc3cbb4c4efb17d1","last_reissued_at":"2026-05-18T01:24:13.866118Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:24:13.866118Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"An Operator for Entity Extraction in MapReduce","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.DB","authors_text":"Ndapandula Nakashole","submitted_at":"2015-12-15T21:23:20Z","abstract_excerpt":"Dictionary-based entity extraction involves finding mentions of dictionary entities in text. Text mentions are often noisy, containing spurious or missing words. Efficient algorithms for detecting approximate entity mentions follow one of two general techniques. The first approach is to build an index on the entities and perform index lookups of document substrings. The second approach recognizes that the number of substrings generated from documents can explode to large numbers, to get around this, they use a filter to prune many such substrings which do not match any dictionary entity and th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.04973","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1512.04973","created_at":"2026-05-18T01:24:13.866179+00:00"},{"alias_kind":"arxiv_version","alias_value":"1512.04973v1","created_at":"2026-05-18T01:24:13.866179+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.04973","created_at":"2026-05-18T01:24:13.866179+00:00"},{"alias_kind":"pith_short_12","alias_value":"VHZF4ARAQQ7T","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_16","alias_value":"VHZF4ARAQQ7TGFDD","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_8","alias_value":"VHZF4ARA","created_at":"2026-05-18T12:29:44.643036+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB","json":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB.json","graph_json":"https://pith.science/api/pith-number/VHZF4ARAQQ7TGFDDPHOZSA6GUB/graph.json","events_json":"https://pith.science/api/pith-number/VHZF4ARAQQ7TGFDDPHOZSA6GUB/events.json","paper":"https://pith.science/paper/VHZF4ARA"},"agent_actions":{"view_html":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB","download_json":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB.json","view_paper":"https://pith.science/paper/VHZF4ARA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1512.04973&json=true","fetch_graph":"https://pith.science/api/pith-number/VHZF4ARAQQ7TGFDDPHOZSA6GUB/graph.json","fetch_events":"https://pith.science/api/pith-number/VHZF4ARAQQ7TGFDDPHOZSA6GUB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB/action/storage_attestation","attest_author":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB/action/author_attestation","sign_citation":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB/action/citation_signature","submit_replication":"https://pith.science/pith/VHZF4ARAQQ7TGFDDPHOZSA6GUB/action/replication_record"}},"created_at":"2026-05-18T01:24:13.866179+00:00","updated_at":"2026-05-18T01:24:13.866179+00:00"}