{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:5WU3FNINPDZTTCAWGGYUPQ4K7Q","short_pith_number":"pith:5WU3FNIN","canonical_record":{"source":{"id":"1504.05319","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2015-04-21T06:58:26Z","cross_cats_sorted":[],"title_canon_sha256":"636f946e528c92f4413c520f157c0d3d87a7784c4a9c6780756f9c4974e52ce9","abstract_canon_sha256":"67c0ad985857afe8443562d9ba3214bb1ad08fb779a597ed319f919d76d0e599"},"schema_version":"1.0"},"canonical_sha256":"eda9b2b50d78f339881631b147c38afc0fbe82097e37741acb1bf8a5f47c6029","source":{"kind":"arxiv","id":"1504.05319","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1504.05319","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"arxiv_version","alias_value":"1504.05319v2","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1504.05319","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"pith_short_12","alias_value":"5WU3FNINPDZT","created_at":"2026-05-18T12:29:07Z"},{"alias_kind":"pith_short_16","alias_value":"5WU3FNINPDZTTCAW","created_at":"2026-05-18T12:29:07Z"},{"alias_kind":"pith_short_8","alias_value":"5WU3FNIN","created_at":"2026-05-18T12:29:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:5WU3FNINPDZTTCAWGGYUPQ4K7Q","target":"record","payload":{"canonical_record":{"source":{"id":"1504.05319","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2015-04-21T06:58:26Z","cross_cats_sorted":[],"title_canon_sha256":"636f946e528c92f4413c520f157c0d3d87a7784c4a9c6780756f9c4974e52ce9","abstract_canon_sha256":"67c0ad985857afe8443562d9ba3214bb1ad08fb779a597ed319f919d76d0e599"},"schema_version":"1.0"},"canonical_sha256":"eda9b2b50d78f339881631b147c38afc0fbe82097e37741acb1bf8a5f47c6029","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:03:59.566749Z","signature_b64":"CYwdfu7AGRWJMXvI/hOV2ZM98Q1sM9NyrBDXmBo6XvIjfX7mOzpztKX4WmIddpCmi/eQ1L1GK3QJYm3EWmZWAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eda9b2b50d78f339881631b147c38afc0fbe82097e37741acb1bf8a5f47c6029","last_reissued_at":"2026-05-18T02:03:59.565941Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:03:59.565941Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1504.05319","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:03:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uCEPi68Y6nmebUNeAiKEeSTMpI2tuujjqw0/nCRXIk+a1LWdE/QEGw1G96BbuJkokes24lsyKSVcav+nH9EbDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T09:56:09.463939Z"},"content_sha256":"428e8518ea3d10630559bf6b95ef7555120a97e61e462fb4bb664bffee05ea10","schema_version":"1.0","event_id":"sha256:428e8518ea3d10630559bf6b95ef7555120a97e61e462fb4bb664bffee05ea10"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:5WU3FNINPDZTTCAWGGYUPQ4K7Q","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Big Data Small Data, In Domain Out-of Domain, Known Word Unknown Word: The Impact of Word Representation on Sequence Labelling Tasks","license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Gabriela Ferraro, Liyuan Zhou, Lizhen Qu, Nathan Schneider, Timothy Baldwin, Weiwei Hou","submitted_at":"2015-04-21T06:58:26Z","abstract_excerpt":"Word embeddings -- distributed word representations that can be learned from unlabelled data -- have been shown to have high utility in many natural language processing applications. In this paper, we perform an extrinsic evaluation of five popular word embedding methods in the context of four sequence labelling tasks: POS-tagging, syntactic chunking, NER and MWE identification. A particular focus of the paper is analysing the effects of task-based updating of word representations. We show that when using word embeddings as features, as few as several hundred training instances are sufficient "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1504.05319","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:03:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/Ls/Z9iyhwp9gZBGLsPq6XIAOaPlpvuQmzr+yyXDDSyCuZoN4p2RSCsgLuYOzLNkrR26vuLJI3ncZ0+o/8q1Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T09:56:09.464609Z"},"content_sha256":"cb669ffe913b4aab22fe6b94e0673c1504c56360118a9142084f7882b6312481","schema_version":"1.0","event_id":"sha256:cb669ffe913b4aab22fe6b94e0673c1504c56360118a9142084f7882b6312481"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q/bundle.json","state_url":"https://pith.science/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T09:56:09Z","links":{"resolver":"https://pith.science/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q","bundle":"https://pith.science/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q/bundle.json","state":"https://pith.science/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5WU3FNINPDZTTCAWGGYUPQ4K7Q/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:5WU3FNINPDZTTCAWGGYUPQ4K7Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"67c0ad985857afe8443562d9ba3214bb1ad08fb779a597ed319f919d76d0e599","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2015-04-21T06:58:26Z","title_canon_sha256":"636f946e528c92f4413c520f157c0d3d87a7784c4a9c6780756f9c4974e52ce9"},"schema_version":"1.0","source":{"id":"1504.05319","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1504.05319","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"arxiv_version","alias_value":"1504.05319v2","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1504.05319","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"pith_short_12","alias_value":"5WU3FNINPDZT","created_at":"2026-05-18T12:29:07Z"},{"alias_kind":"pith_short_16","alias_value":"5WU3FNINPDZTTCAW","created_at":"2026-05-18T12:29:07Z"},{"alias_kind":"pith_short_8","alias_value":"5WU3FNIN","created_at":"2026-05-18T12:29:07Z"}],"graph_snapshots":[{"event_id":"sha256:cb669ffe913b4aab22fe6b94e0673c1504c56360118a9142084f7882b6312481","target":"graph","created_at":"2026-05-18T02:03:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Word embeddings -- distributed word representations that can be learned from unlabelled data -- have been shown to have high utility in many natural language processing applications. In this paper, we perform an extrinsic evaluation of five popular word embedding methods in the context of four sequence labelling tasks: POS-tagging, syntactic chunking, NER and MWE identification. A particular focus of the paper is analysing the effects of task-based updating of word representations. We show that when using word embeddings as features, as few as several hundred training instances are sufficient ","authors_text":"Gabriela Ferraro, Liyuan Zhou, Lizhen Qu, Nathan Schneider, Timothy Baldwin, Weiwei Hou","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2015-04-21T06:58:26Z","title":"Big Data Small Data, In Domain Out-of Domain, Known Word Unknown Word: The Impact of Word Representation on Sequence Labelling Tasks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1504.05319","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:428e8518ea3d10630559bf6b95ef7555120a97e61e462fb4bb664bffee05ea10","target":"record","created_at":"2026-05-18T02:03:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"67c0ad985857afe8443562d9ba3214bb1ad08fb779a597ed319f919d76d0e599","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2015-04-21T06:58:26Z","title_canon_sha256":"636f946e528c92f4413c520f157c0d3d87a7784c4a9c6780756f9c4974e52ce9"},"schema_version":"1.0","source":{"id":"1504.05319","kind":"arxiv","version":2}},"canonical_sha256":"eda9b2b50d78f339881631b147c38afc0fbe82097e37741acb1bf8a5f47c6029","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"eda9b2b50d78f339881631b147c38afc0fbe82097e37741acb1bf8a5f47c6029","first_computed_at":"2026-05-18T02:03:59.565941Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:03:59.565941Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"CYwdfu7AGRWJMXvI/hOV2ZM98Q1sM9NyrBDXmBo6XvIjfX7mOzpztKX4WmIddpCmi/eQ1L1GK3QJYm3EWmZWAg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:03:59.566749Z","signed_message":"canonical_sha256_bytes"},"source_id":"1504.05319","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:428e8518ea3d10630559bf6b95ef7555120a97e61e462fb4bb664bffee05ea10","sha256:cb669ffe913b4aab22fe6b94e0673c1504c56360118a9142084f7882b6312481"],"state_sha256":"2819996210bb478de6db6ad44dc2daf2f11f34730bb4bdcd64f63453d1dd93b3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"72ds73TKf5zsKo9/fm3nxkOe8NFvoZZwfcm4ZVxhYUZIpJBCahqBFFqc+YWetqmqhDMb2NbL7iDUTE8WuVN7BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T09:56:09.467665Z","bundle_sha256":"96f8e725c237fcdf9d3c36212a16881b30705d7bc158c7ac95f5f28e51f1db14"}}