{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:L2XDFOY6BWEFJ6AQOMQU7G5Y57","short_pith_number":"pith:L2XDFOY6","canonical_record":{"source":{"id":"1905.07791","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-05-19T18:28:34Z","cross_cats_sorted":[],"title_canon_sha256":"9ffbae7d07d331fabd33a01d678dd7c2797d12d448065fc23ee8558a5846b42a","abstract_canon_sha256":"7c7dc89f4e1a22bb2ec5c2ffb84dbf68899db3a0ca6e2b7bf7ace09f3a25d2cc"},"schema_version":"1.0"},"canonical_sha256":"5eae32bb1e0d8854f81073214f9bb8eff8328d9c5d6c3fdc9c4eeda641554d7e","source":{"kind":"arxiv","id":"1905.07791","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.07791","created_at":"2026-05-17T23:45:48Z"},{"alias_kind":"arxiv_version","alias_value":"1905.07791v1","created_at":"2026-05-17T23:45:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.07791","created_at":"2026-05-17T23:45:48Z"},{"alias_kind":"pith_short_12","alias_value":"L2XDFOY6BWEF","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"L2XDFOY6BWEFJ6AQ","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"L2XDFOY6","created_at":"2026-05-18T12:33:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:L2XDFOY6BWEFJ6AQOMQU7G5Y57","target":"record","payload":{"canonical_record":{"source":{"id":"1905.07791","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-05-19T18:28:34Z","cross_cats_sorted":[],"title_canon_sha256":"9ffbae7d07d331fabd33a01d678dd7c2797d12d448065fc23ee8558a5846b42a","abstract_canon_sha256":"7c7dc89f4e1a22bb2ec5c2ffb84dbf68899db3a0ca6e2b7bf7ace09f3a25d2cc"},"schema_version":"1.0"},"canonical_sha256":"5eae32bb1e0d8854f81073214f9bb8eff8328d9c5d6c3fdc9c4eeda641554d7e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:48.465335Z","signature_b64":"M7Jdblx9sqRvMb8LqeGTX5vT98tdbPGAkK35fRkoYCoaKQnkKkz93HDyjhXJpPvGBmQTyBqfYbKUGtdPzUmJCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5eae32bb1e0d8854f81073214f9bb8eff8328d9c5d6c3fdc9c4eeda641554d7e","last_reissued_at":"2026-05-17T23:45:48.464843Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:48.464843Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1905.07791","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Fi/NHngLFFKoVfKu55n7gcHqWUqPIlBBNGVshCxxb1vLj7bvir3pwTeRwJHe625XHYHXDPOBec465vsfoHGGBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T15:47:43.174411Z"},"content_sha256":"dced74910713a13b3c86a499cafba05fcef2854fdbd6ea518b8431b5e65db06d","schema_version":"1.0","event_id":"sha256:dced74910713a13b3c86a499cafba05fcef2854fdbd6ea518b8431b5e65db06d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:L2XDFOY6BWEFJ6AQOMQU7G5Y57","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Predicting Annotation Difficulty to Improve Task Routing and Model Performance for Biomedical Information Extraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Ani Nenkova, Byron C. Wallace, Chris Tar, Oshin Agarwal, Yinfei Yang","submitted_at":"2019-05-19T18:28:34Z","abstract_excerpt":"Modern NLP systems require high-quality annotated data. In specialized domains, expert annotations may be prohibitively expensive. An alternative is to rely on crowdsourcing to reduce costs at the risk of introducing noise. In this paper we demonstrate that directly modeling instance difficulty can be used to improve model performance, and to route instances to appropriate annotators. Our difficulty prediction model combines two learned representations: a `universal' encoder trained on out-of-domain data, and a task-specific encoder. Experiments on a complex biomedical information extraction t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.07791","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"k3oP14QXvJamYM/fvUdpIdcHwLdZrbMw7Bi7D/4pkTrzBe26jRLvP8+MhCxeofpHsR2oqIfqiwVIdYBn3sscBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T15:47:43.175022Z"},"content_sha256":"318082adea7b2aa745f79ef2cba30144f82a7005a1634ba266363020be7ba24d","schema_version":"1.0","event_id":"sha256:318082adea7b2aa745f79ef2cba30144f82a7005a1634ba266363020be7ba24d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57/bundle.json","state_url":"https://pith.science/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T15:47:43Z","links":{"resolver":"https://pith.science/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57","bundle":"https://pith.science/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57/bundle.json","state":"https://pith.science/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57/state.json","well_known_bundle":"https://pith.science/.well-known/pith/L2XDFOY6BWEFJ6AQOMQU7G5Y57/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:L2XDFOY6BWEFJ6AQOMQU7G5Y57","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7c7dc89f4e1a22bb2ec5c2ffb84dbf68899db3a0ca6e2b7bf7ace09f3a25d2cc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-05-19T18:28:34Z","title_canon_sha256":"9ffbae7d07d331fabd33a01d678dd7c2797d12d448065fc23ee8558a5846b42a"},"schema_version":"1.0","source":{"id":"1905.07791","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.07791","created_at":"2026-05-17T23:45:48Z"},{"alias_kind":"arxiv_version","alias_value":"1905.07791v1","created_at":"2026-05-17T23:45:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.07791","created_at":"2026-05-17T23:45:48Z"},{"alias_kind":"pith_short_12","alias_value":"L2XDFOY6BWEF","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"L2XDFOY6BWEFJ6AQ","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"L2XDFOY6","created_at":"2026-05-18T12:33:21Z"}],"graph_snapshots":[{"event_id":"sha256:318082adea7b2aa745f79ef2cba30144f82a7005a1634ba266363020be7ba24d","target":"graph","created_at":"2026-05-17T23:45:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Modern NLP systems require high-quality annotated data. In specialized domains, expert annotations may be prohibitively expensive. An alternative is to rely on crowdsourcing to reduce costs at the risk of introducing noise. In this paper we demonstrate that directly modeling instance difficulty can be used to improve model performance, and to route instances to appropriate annotators. Our difficulty prediction model combines two learned representations: a `universal' encoder trained on out-of-domain data, and a task-specific encoder. Experiments on a complex biomedical information extraction t","authors_text":"Ani Nenkova, Byron C. Wallace, Chris Tar, Oshin Agarwal, Yinfei Yang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-05-19T18:28:34Z","title":"Predicting Annotation Difficulty to Improve Task Routing and Model Performance for Biomedical Information Extraction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.07791","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dced74910713a13b3c86a499cafba05fcef2854fdbd6ea518b8431b5e65db06d","target":"record","created_at":"2026-05-17T23:45:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7c7dc89f4e1a22bb2ec5c2ffb84dbf68899db3a0ca6e2b7bf7ace09f3a25d2cc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-05-19T18:28:34Z","title_canon_sha256":"9ffbae7d07d331fabd33a01d678dd7c2797d12d448065fc23ee8558a5846b42a"},"schema_version":"1.0","source":{"id":"1905.07791","kind":"arxiv","version":1}},"canonical_sha256":"5eae32bb1e0d8854f81073214f9bb8eff8328d9c5d6c3fdc9c4eeda641554d7e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5eae32bb1e0d8854f81073214f9bb8eff8328d9c5d6c3fdc9c4eeda641554d7e","first_computed_at":"2026-05-17T23:45:48.464843Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:45:48.464843Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"M7Jdblx9sqRvMb8LqeGTX5vT98tdbPGAkK35fRkoYCoaKQnkKkz93HDyjhXJpPvGBmQTyBqfYbKUGtdPzUmJCA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:45:48.465335Z","signed_message":"canonical_sha256_bytes"},"source_id":"1905.07791","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dced74910713a13b3c86a499cafba05fcef2854fdbd6ea518b8431b5e65db06d","sha256:318082adea7b2aa745f79ef2cba30144f82a7005a1634ba266363020be7ba24d"],"state_sha256":"08824954a5c51c52eeaf79b61f95819c6bfb2c791c56587dabbb14f1fbe2cba5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1zY+yM09GkC6J5PoPUy2lniwWhqXJ+FWUcBVPGEsnF8TlxicVJHiMY+vP0kkNDxleHRQMdBbJSXiXAr/Lu0RBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T15:47:43.178566Z","bundle_sha256":"8e2549fa4ab3fa0d72edd852d64e37a1b5cf1b21fce47bae1c1d8681c4909d87"}}