{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:OSPG5CNLLX3S6TJYP57XORBK3Q","short_pith_number":"pith:OSPG5CNL","canonical_record":{"source":{"id":"1701.02877","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-01-11T08:02:40Z","cross_cats_sorted":[],"title_canon_sha256":"0e888680daf587aae36157758efe09e7580f71819ac0061de68cef23eb79fca5","abstract_canon_sha256":"d5b5ce3cc41c4e175af9ddaab904d44a3219995da5624c06c7d6401edba19e92"},"schema_version":"1.0"},"canonical_sha256":"749e6e89ab5df72f4d387f7f77442adc056254b0c12352e020d0c2ae2b6b4812","source":{"kind":"arxiv","id":"1701.02877","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.02877","created_at":"2026-05-18T00:49:06Z"},{"alias_kind":"arxiv_version","alias_value":"1701.02877v2","created_at":"2026-05-18T00:49:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.02877","created_at":"2026-05-18T00:49:06Z"},{"alias_kind":"pith_short_12","alias_value":"OSPG5CNLLX3S","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OSPG5CNLLX3S6TJY","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OSPG5CNL","created_at":"2026-05-18T12:31:34Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:OSPG5CNLLX3S6TJYP57XORBK3Q","target":"record","payload":{"canonical_record":{"source":{"id":"1701.02877","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-01-11T08:02:40Z","cross_cats_sorted":[],"title_canon_sha256":"0e888680daf587aae36157758efe09e7580f71819ac0061de68cef23eb79fca5","abstract_canon_sha256":"d5b5ce3cc41c4e175af9ddaab904d44a3219995da5624c06c7d6401edba19e92"},"schema_version":"1.0"},"canonical_sha256":"749e6e89ab5df72f4d387f7f77442adc056254b0c12352e020d0c2ae2b6b4812","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:49:06.325295Z","signature_b64":"H/zGUBP4g6QQ77rUhBFwjZGdOL578yeBETs8/4m02+saN3NRgML8lXYMu1mNu20wuVkx8ItBS5jIzieWyjvgAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"749e6e89ab5df72f4d387f7f77442adc056254b0c12352e020d0c2ae2b6b4812","last_reissued_at":"2026-05-18T00:49:06.324890Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:49:06.324890Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1701.02877","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:49:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sf6ZLVfOKsZpys6kCrtJH1reSRnIsSAp9EaZwFaMEKzjrLsNI1WRaLQyG0DjqguC3CLxauLzQSgNs5ro5B/BCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T20:21:49.155448Z"},"content_sha256":"0c54727a830e96fe54ad9508eb5c65ca24a489d8cab9a69635e33217e37c3865","schema_version":"1.0","event_id":"sha256:0c54727a830e96fe54ad9508eb5c65ca24a489d8cab9a69635e33217e37c3865"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:OSPG5CNLLX3S6TJYP57XORBK3Q","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Generalisation in Named Entity Recognition: A Quantitative Analysis","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Isabelle Augenstein, Kalina Bontcheva, Leon Derczynski","submitted_at":"2017-01-11T08:02:40Z","abstract_excerpt":"Named Entity Recognition (NER) is a key NLP task, which is all the more challenging on Web and user-generated content with their diverse and continuously changing language. This paper aims to quantify how this diversity impacts state-of-the-art NER methods, by measuring named entity (NE) and context variability, feature sparsity, and their effects on precision and recall. In particular, our findings indicate that NER approaches struggle to generalise in diverse genres with limited training data. Unseen NEs, in particular, play an important role, which have a higher incidence in diverse genres "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.02877","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:49:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"78A8AL+vzN7AH61Ncxs2RRjjv8z13dXGQbjzz72Yu8mFfyjpbsmaO6NrTMqzFj4cyO6/mhN3CCaNLECd2KG0CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T20:21:49.155915Z"},"content_sha256":"82af2ebfa8640f0606442f2b44c214ca95a01bad0086a96552f706a8ea7694c7","schema_version":"1.0","event_id":"sha256:82af2ebfa8640f0606442f2b44c214ca95a01bad0086a96552f706a8ea7694c7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OSPG5CNLLX3S6TJYP57XORBK3Q/bundle.json","state_url":"https://pith.science/pith/OSPG5CNLLX3S6TJYP57XORBK3Q/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OSPG5CNLLX3S6TJYP57XORBK3Q/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T20:21:49Z","links":{"resolver":"https://pith.science/pith/OSPG5CNLLX3S6TJYP57XORBK3Q","bundle":"https://pith.science/pith/OSPG5CNLLX3S6TJYP57XORBK3Q/bundle.json","state":"https://pith.science/pith/OSPG5CNLLX3S6TJYP57XORBK3Q/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OSPG5CNLLX3S6TJYP57XORBK3Q/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:OSPG5CNLLX3S6TJYP57XORBK3Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d5b5ce3cc41c4e175af9ddaab904d44a3219995da5624c06c7d6401edba19e92","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-01-11T08:02:40Z","title_canon_sha256":"0e888680daf587aae36157758efe09e7580f71819ac0061de68cef23eb79fca5"},"schema_version":"1.0","source":{"id":"1701.02877","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.02877","created_at":"2026-05-18T00:49:06Z"},{"alias_kind":"arxiv_version","alias_value":"1701.02877v2","created_at":"2026-05-18T00:49:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.02877","created_at":"2026-05-18T00:49:06Z"},{"alias_kind":"pith_short_12","alias_value":"OSPG5CNLLX3S","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OSPG5CNLLX3S6TJY","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OSPG5CNL","created_at":"2026-05-18T12:31:34Z"}],"graph_snapshots":[{"event_id":"sha256:82af2ebfa8640f0606442f2b44c214ca95a01bad0086a96552f706a8ea7694c7","target":"graph","created_at":"2026-05-18T00:49:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Named Entity Recognition (NER) is a key NLP task, which is all the more challenging on Web and user-generated content with their diverse and continuously changing language. This paper aims to quantify how this diversity impacts state-of-the-art NER methods, by measuring named entity (NE) and context variability, feature sparsity, and their effects on precision and recall. In particular, our findings indicate that NER approaches struggle to generalise in diverse genres with limited training data. Unseen NEs, in particular, play an important role, which have a higher incidence in diverse genres ","authors_text":"Isabelle Augenstein, Kalina Bontcheva, Leon Derczynski","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-01-11T08:02:40Z","title":"Generalisation in Named Entity Recognition: A Quantitative Analysis"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.02877","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0c54727a830e96fe54ad9508eb5c65ca24a489d8cab9a69635e33217e37c3865","target":"record","created_at":"2026-05-18T00:49:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d5b5ce3cc41c4e175af9ddaab904d44a3219995da5624c06c7d6401edba19e92","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-01-11T08:02:40Z","title_canon_sha256":"0e888680daf587aae36157758efe09e7580f71819ac0061de68cef23eb79fca5"},"schema_version":"1.0","source":{"id":"1701.02877","kind":"arxiv","version":2}},"canonical_sha256":"749e6e89ab5df72f4d387f7f77442adc056254b0c12352e020d0c2ae2b6b4812","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"749e6e89ab5df72f4d387f7f77442adc056254b0c12352e020d0c2ae2b6b4812","first_computed_at":"2026-05-18T00:49:06.324890Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:49:06.324890Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"H/zGUBP4g6QQ77rUhBFwjZGdOL578yeBETs8/4m02+saN3NRgML8lXYMu1mNu20wuVkx8ItBS5jIzieWyjvgAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:49:06.325295Z","signed_message":"canonical_sha256_bytes"},"source_id":"1701.02877","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0c54727a830e96fe54ad9508eb5c65ca24a489d8cab9a69635e33217e37c3865","sha256:82af2ebfa8640f0606442f2b44c214ca95a01bad0086a96552f706a8ea7694c7"],"state_sha256":"56402fc0ba94c8a90bf0eb6d1d5bd9cfe888431399a0c528d6107339b29e80bb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"09q2WkBpdR72iZ16JcVhms6i9OC/nzuvmBSh2L/70296RowoV6TbTq2cYnbmfYMZRg0j8j8F9XPnjhIzoT+oBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T20:21:49.159007Z","bundle_sha256":"e115809d3a74a2131a8bf950802d76610b31b851b6e29d75a3e390d1154cb99c"}}