{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:IZ5WUD723UJZYHVNKZH5KMMKE3","short_pith_number":"pith:IZ5WUD72","canonical_record":{"source":{"id":"2604.13232","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-14T19:01:25Z","cross_cats_sorted":[],"title_canon_sha256":"cf7530a3a7ba7f8e577050b2b5d5d6f841ff0018cbed7df1657c0aed2bfc7ce9","abstract_canon_sha256":"818417c178bc70c93a0c9c434c5071f97abd289d8b567722eaeafb8af2ab8db8"},"schema_version":"1.0"},"canonical_sha256":"467b6a0ffadd139c1ead564fd5318a26e07c2f3dbe36350eadc5ff5c4cf7e178","source":{"kind":"arxiv","id":"2604.13232","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.13232","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2604.13232v2","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.13232","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"IZ5WUD723UJZ","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"IZ5WUD723UJZYHVN","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"IZ5WUD72","created_at":"2026-05-28T01:04:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:IZ5WUD723UJZYHVNKZH5KMMKE3","target":"record","payload":{"canonical_record":{"source":{"id":"2604.13232","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-14T19:01:25Z","cross_cats_sorted":[],"title_canon_sha256":"cf7530a3a7ba7f8e577050b2b5d5d6f841ff0018cbed7df1657c0aed2bfc7ce9","abstract_canon_sha256":"818417c178bc70c93a0c9c434c5071f97abd289d8b567722eaeafb8af2ab8db8"},"schema_version":"1.0"},"canonical_sha256":"467b6a0ffadd139c1ead564fd5318a26e07c2f3dbe36350eadc5ff5c4cf7e178","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:40.066799Z","signature_b64":"iMboyMFPXS7j/lRMNId7ZO8FsJpGg9vRaWeNKAT1m9tkJX53QsnJ/d3HZVXuqlzhCDVn9IqAcUept/pZrGVvBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"467b6a0ffadd139c1ead564fd5318a26e07c2f3dbe36350eadc5ff5c4cf7e178","last_reissued_at":"2026-05-28T01:04:40.066161Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:40.066161Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.13232","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YoxvGVn86PNKVkprXzlPlmx45b/MnNN66yI58Pq7Y0sZwBPQmbfUPl6JGs0SKrTsIYZhR3qi7JksKGouIC/HDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:15:04.569399Z"},"content_sha256":"434f17069a1ee8a6a71b8d7bed1d1d0ba8db19dceb97be7a623e78080963170b","schema_version":"1.0","event_id":"sha256:434f17069a1ee8a6a71b8d7bed1d1d0ba8db19dceb97be7a623e78080963170b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:IZ5WUD723UJZYHVNKZH5KMMKE3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Evaluating the Evaluator: Problems with SemEval-2020 Task 1 for Lexical Semantic Change Detection","license":"http://creativecommons.org/licenses/by/4.0/","headline":"SemEval-2020 Task 1 for lexical semantic change detection has narrow definitions of change, corpus preprocessing errors, and limited target sets that make it a partial rather than definitive benchmark.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bach Phan-Tat, Dirk Geeraerts, Dirk Speelmana, Kris Heylen, Stefano De Pascale","submitted_at":"2026-04-14T19:01:25Z","abstract_excerpt":"This discussion paper re-examines SemEval-2020 Task 1, the most influential shared benchmark for lexical semantic change detection, through a three-part evaluative framework: operationalisation, data quality, and benchmark design. First, at the level of operationalisation, we argue that the benchmark models semantic change mainly as gain, loss, or redistribution of discrete senses. While practical for annotation and evaluation, this framing is too narrow to capture gradual, constructional, collocational, and discourse-level change. Also, the gold labels are outcomes of annotation decisions, cl"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Taken together, these limitations suggest that the benchmark should be treated as a useful but partial test bed rather than a definitive measure of progress.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the listed corpus and preprocessing problems (OCR noise, malformed characters, truncated sentences, inconsistent lemmatisation, POS-tagging errors, missed targets) substantially distort model behaviour, complicate linguistic analysis, and reduce reproducibility.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"The SemEval-2020 Task 1 benchmark for lexical semantic change detection is limited by a narrow sense-based definition of change, substantial corpus and preprocessing errors, and small curated target sets that reduce realism.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"SemEval-2020 Task 1 for lexical semantic change detection has narrow definitions of change, corpus preprocessing errors, and limited target sets that make it a partial rather than definitive benchmark.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"05a41b603737bf70fa428ac4db321ee02932115d39626f359a5f14a7b7d82d09"},"source":{"id":"2604.13232","kind":"arxiv","version":2},"verdict":{"id":"c197ea2b-d712-4fb6-b579-7088baca8e75","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T15:31:06.988162Z","strongest_claim":"Taken together, these limitations suggest that the benchmark should be treated as a useful but partial test bed rather than a definitive measure of progress.","one_line_summary":"The SemEval-2020 Task 1 benchmark for lexical semantic change detection is limited by a narrow sense-based definition of change, substantial corpus and preprocessing errors, and small curated target sets that reduce realism.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the listed corpus and preprocessing problems (OCR noise, malformed characters, truncated sentences, inconsistent lemmatisation, POS-tagging errors, missed targets) substantially distort model behaviour, complicate linguistic analysis, and reduce reproducibility.","pith_extraction_headline":"SemEval-2020 Task 1 for lexical semantic change detection has narrow definitions of change, corpus preprocessing errors, and limited target sets that make it a partial rather than definitive benchmark."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.13232/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"c197ea2b-d712-4fb6-b579-7088baca8e75"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6AjepWCEZH4Vn4odDp2sCNQ2RDDVSNNb7njhUnedscHDy4fFiLc1WC84eGPlh/dmvqqwBs5yjG7OmWg040MiAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:15:04.569953Z"},"content_sha256":"30b1fe7d5dc911ba0ee328e332fb20f3b5e50d15ee9046ad97a28c5728937deb","schema_version":"1.0","event_id":"sha256:30b1fe7d5dc911ba0ee328e332fb20f3b5e50d15ee9046ad97a28c5728937deb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IZ5WUD723UJZYHVNKZH5KMMKE3/bundle.json","state_url":"https://pith.science/pith/IZ5WUD723UJZYHVNKZH5KMMKE3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IZ5WUD723UJZYHVNKZH5KMMKE3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T11:15:04Z","links":{"resolver":"https://pith.science/pith/IZ5WUD723UJZYHVNKZH5KMMKE3","bundle":"https://pith.science/pith/IZ5WUD723UJZYHVNKZH5KMMKE3/bundle.json","state":"https://pith.science/pith/IZ5WUD723UJZYHVNKZH5KMMKE3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IZ5WUD723UJZYHVNKZH5KMMKE3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IZ5WUD723UJZYHVNKZH5KMMKE3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"818417c178bc70c93a0c9c434c5071f97abd289d8b567722eaeafb8af2ab8db8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-14T19:01:25Z","title_canon_sha256":"cf7530a3a7ba7f8e577050b2b5d5d6f841ff0018cbed7df1657c0aed2bfc7ce9"},"schema_version":"1.0","source":{"id":"2604.13232","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.13232","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2604.13232v2","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.13232","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"IZ5WUD723UJZ","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"IZ5WUD723UJZYHVN","created_at":"2026-05-28T01:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"IZ5WUD72","created_at":"2026-05-28T01:04:40Z"}],"graph_snapshots":[{"event_id":"sha256:30b1fe7d5dc911ba0ee328e332fb20f3b5e50d15ee9046ad97a28c5728937deb","target":"graph","created_at":"2026-05-28T01:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Taken together, these limitations suggest that the benchmark should be treated as a useful but partial test bed rather than a definitive measure of progress."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the listed corpus and preprocessing problems (OCR noise, malformed characters, truncated sentences, inconsistent lemmatisation, POS-tagging errors, missed targets) substantially distort model behaviour, complicate linguistic analysis, and reduce reproducibility."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"The SemEval-2020 Task 1 benchmark for lexical semantic change detection is limited by a narrow sense-based definition of change, substantial corpus and preprocessing errors, and small curated target sets that reduce realism."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"SemEval-2020 Task 1 for lexical semantic change detection has narrow definitions of change, corpus preprocessing errors, and limited target sets that make it a partial rather than definitive benchmark."}],"snapshot_sha256":"05a41b603737bf70fa428ac4db321ee02932115d39626f359a5f14a7b7d82d09"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.13232/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"This discussion paper re-examines SemEval-2020 Task 1, the most influential shared benchmark for lexical semantic change detection, through a three-part evaluative framework: operationalisation, data quality, and benchmark design. First, at the level of operationalisation, we argue that the benchmark models semantic change mainly as gain, loss, or redistribution of discrete senses. While practical for annotation and evaluation, this framing is too narrow to capture gradual, constructional, collocational, and discourse-level change. Also, the gold labels are outcomes of annotation decisions, cl","authors_text":"Bach Phan-Tat, Dirk Geeraerts, Dirk Speelmana, Kris Heylen, Stefano De Pascale","cross_cats":[],"headline":"SemEval-2020 Task 1 for lexical semantic change detection has narrow definitions of change, corpus preprocessing errors, and limited target sets that make it a partial rather than definitive benchmark.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-14T19:01:25Z","title":"Evaluating the Evaluator: Problems with SemEval-2020 Task 1 for Lexical Semantic Change Detection"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.13232","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T15:31:06.988162Z","id":"c197ea2b-d712-4fb6-b579-7088baca8e75","model_set":{"reader":"grok-4.3"},"one_line_summary":"The SemEval-2020 Task 1 benchmark for lexical semantic change detection is limited by a narrow sense-based definition of change, substantial corpus and preprocessing errors, and small curated target sets that reduce realism.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"SemEval-2020 Task 1 for lexical semantic change detection has narrow definitions of change, corpus preprocessing errors, and limited target sets that make it a partial rather than definitive benchmark.","strongest_claim":"Taken together, these limitations suggest that the benchmark should be treated as a useful but partial test bed rather than a definitive measure of progress.","weakest_assumption":"That the listed corpus and preprocessing problems (OCR noise, malformed characters, truncated sentences, inconsistent lemmatisation, POS-tagging errors, missed targets) substantially distort model behaviour, complicate linguistic analysis, and reduce reproducibility."}},"verdict_id":"c197ea2b-d712-4fb6-b579-7088baca8e75"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:434f17069a1ee8a6a71b8d7bed1d1d0ba8db19dceb97be7a623e78080963170b","target":"record","created_at":"2026-05-28T01:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"818417c178bc70c93a0c9c434c5071f97abd289d8b567722eaeafb8af2ab8db8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-14T19:01:25Z","title_canon_sha256":"cf7530a3a7ba7f8e577050b2b5d5d6f841ff0018cbed7df1657c0aed2bfc7ce9"},"schema_version":"1.0","source":{"id":"2604.13232","kind":"arxiv","version":2}},"canonical_sha256":"467b6a0ffadd139c1ead564fd5318a26e07c2f3dbe36350eadc5ff5c4cf7e178","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"467b6a0ffadd139c1ead564fd5318a26e07c2f3dbe36350eadc5ff5c4cf7e178","first_computed_at":"2026-05-28T01:04:40.066161Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:40.066161Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"iMboyMFPXS7j/lRMNId7ZO8FsJpGg9vRaWeNKAT1m9tkJX53QsnJ/d3HZVXuqlzhCDVn9IqAcUept/pZrGVvBQ==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:40.066799Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.13232","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:434f17069a1ee8a6a71b8d7bed1d1d0ba8db19dceb97be7a623e78080963170b","sha256:30b1fe7d5dc911ba0ee328e332fb20f3b5e50d15ee9046ad97a28c5728937deb"],"state_sha256":"49cf5faa73fc6e37240a571490608726a3f7362d8cf93df6feb35e099f4712d4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZCehSlMEa8Rd5FEh+K7j072E9QBfVLNSvPk8XHittJ96t2mXm/e3FvqChM9VfL6HLFrlW/JP2ROGGHjSUdRDAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T11:15:04.572271Z","bundle_sha256":"ee01e80295a66fc2b32352dfef69be5785c7c2f880cc4d34ac0700df005de391"}}