{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YL2LFA3JTFL7ZY3I54A2L72CFP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2ac48fcefc85196017c93e08d6c23f3d5f20c0bd2e24afc3efa5131a2b273d9c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-22T11:32:31Z","title_canon_sha256":"fbb00213ceeeacade36f54522cfe61669b19980dc471da101dd7a7712c4b328a"},"schema_version":"1.0","source":{"id":"2603.21174","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.21174","created_at":"2026-05-17T23:39:04Z"},{"alias_kind":"arxiv_version","alias_value":"2603.21174v1","created_at":"2026-05-17T23:39:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.21174","created_at":"2026-05-17T23:39:04Z"},{"alias_kind":"pith_short_12","alias_value":"YL2LFA3JTFL7","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"YL2LFA3JTFL7ZY3I","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"YL2LFA3J","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:21c045bf405ab2a0b92cd4b9d428e998ce4ceea590c97ea3502fc267658791f4","target":"graph","created_at":"2026-05-17T23:39:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"DSD can lead to increased performance in the specific task of paraphrase detection."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The semi-automated pipeline combining LLMs with human verification produces reliable and accurate labels for the Span Similarity Dataset."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Introduces the Dissimilar Span Detection task and Span Similarity Dataset to explain semantic textual similarity by identifying differing spans between text pairs."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Detecting dissimilar spans between text pairs explains semantic similarity scores and boosts paraphrase detection performance."}],"snapshot_sha256":"0f74b85f1bbfe06cd215b6fc7a831e0f01b689d0eaf4b831f73d54ba2bb20bd2"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Semantic Textual Similarity (STS) is a crucial component of many Natural Language Processing (NLP) applications. However, existing approaches typically reduce semantic nuances to a single score, limiting interpretability. To address this, we introduce the task of Dissimilar Span Detection (DSD), which aims to identify semantically differing spans between pairs of texts. This can help users understand which particular words or tokens negatively affect the similarity score, or be used to improve performance in STS-dependent downstream tasks. Furthermore, we release a new dataset suitable for the","authors_text":"Alexander Fraser, Daryna Dementieva, Diego Miguel Lozano","cross_cats":[],"headline":"Detecting dissimilar spans between text pairs explains semantic similarity scores and boosts paraphrase detection performance.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-22T11:32:31Z","title":"Explainable Semantic Textual Similarity via Dissimilar Span Detection"},"references":{"count":18,"internal_anchors":3,"resolved_work":18,"sample":[{"cited_arxiv_id":"2603.21174","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Explainable Semantic Textual Similarity via Dissimilar Span Detection","work_id":"c54932f5-6578-46a1-b963-fd0cb2931c3d","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"the alignment between pairs of segments 1 across the two sen- tences, where the relation between the segments is labeled with a relation type and a similarity score","work_id":"cf191cbe-6381-419e-926f-102ff7ac4f53","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"a woman” and “a man","work_id":"18d84470-3ac9-456c-87aa-7b8bf768d633","year":2016},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"The modified spans could ei- ther be equivalent in meaning to the original one, or be semantically dissimilar","work_id":"7d07ccf3-fb5b-4d9f-87eb-2b5ee5386b47","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"In our case, {{ de- notes the beginning of a span, and }} its end","work_id":"415e34b3-7408-4029-b3e8-b6eec1174ab9","year":null}],"snapshot_sha256":"1455f7d7039ad3d8507f988c66aa2e0ac986461c7bb00acd24a78f6511872df5"},"source":{"id":"2603.21174","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T07:16:42.546092Z","id":"b7c1093c-edf1-4109-9d3c-b04f7b406919","model_set":{"reader":"grok-4.3"},"one_line_summary":"Introduces the Dissimilar Span Detection task and Span Similarity Dataset to explain semantic textual similarity by identifying differing spans between text pairs.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Detecting dissimilar spans between text pairs explains semantic similarity scores and boosts paraphrase detection performance.","strongest_claim":"DSD can lead to increased performance in the specific task of paraphrase detection.","weakest_assumption":"The semi-automated pipeline combining LLMs with human verification produces reliable and accurate labels for the Span Similarity Dataset."}},"verdict_id":"b7c1093c-edf1-4109-9d3c-b04f7b406919"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:816c8c5c3813e9f5df05f430ec6665d2af9cf6b8488f9e9d7e012c4640be4259","target":"record","created_at":"2026-05-17T23:39:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2ac48fcefc85196017c93e08d6c23f3d5f20c0bd2e24afc3efa5131a2b273d9c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-22T11:32:31Z","title_canon_sha256":"fbb00213ceeeacade36f54522cfe61669b19980dc471da101dd7a7712c4b328a"},"schema_version":"1.0","source":{"id":"2603.21174","kind":"arxiv","version":1}},"canonical_sha256":"c2f4b283699957fce368ef01a5ff422bff668c55ad776dbee3a10343b3cd9cab","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c2f4b283699957fce368ef01a5ff422bff668c55ad776dbee3a10343b3cd9cab","first_computed_at":"2026-05-17T23:39:04.366399Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:04.366399Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9lFbFtvmPZKgTnrwXhamZnLrwRPmOUHg5S7BSzsiJu0lcB/CrOY5d0jEdLFgMOX2sHVxHwRsGKFydb9iQ1vqDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:04.367244Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.21174","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:816c8c5c3813e9f5df05f430ec6665d2af9cf6b8488f9e9d7e012c4640be4259","sha256:21c045bf405ab2a0b92cd4b9d428e998ce4ceea590c97ea3502fc267658791f4"],"state_sha256":"1bc3b917cc3d5296c2d195f70698cc58519c061cfca7f8a4074c45f562b3b945"}