{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:VUPYIABJ5G4KIF3EPAQR2LLB3G","short_pith_number":"pith:VUPYIABJ","schema_version":"1.0","canonical_sha256":"ad1f840029e9b8a4176478211d2d61d9ab7eab502ce73c1a09c3d9caacec9e97","source":{"kind":"arxiv","id":"2605.27345","version":1},"attestation_state":"computed","paper":{"title":"MATCHA: Matching Text via Contrastive Semantic Alignment","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Carsten Eickhoff, Ece Sena Etoglu, Seyed Ali Bahrainian, Siran Li","submitted_at":"2026-05-26T17:47:14Z","abstract_excerpt":"Reliable evaluation is essential for understanding large language model (LLM) performance, yet today's go-to metrics, namely token-overlap scores (e.g., ROUGE) and embedding-based measures (e.g., BERTScore), often misjudge semantic similarity of documents. Our study shows that both token-overlap metrics and embedding-based metrics routinely assign nearly identical scores to texts that directly contradict each other, thereby potentially masking fundamental errors. We introduce MATCHA, an automatic metric that jointly rewards semantic agreement with a reference and penalizes contradictions. MATC"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.27345","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-26T17:47:14Z","cross_cats_sorted":[],"title_canon_sha256":"929417e6842cc3f218205a362b695386c6a278f2f6060a2d63f63bfe5bfdf524","abstract_canon_sha256":"ae0a5272f863ec00a0916dff34b121a5f9ac9651c5a3443aafbdf9c41fae95d6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T02:06:19.013213Z","signature_b64":"lm+AqgmGL4ixSabapZGgrE7R2o9Elo+b6AfABUMRNO724ItC2S0+UhCe7nDAOEGXcL8hILy4jeA2ZmDUoMsuDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ad1f840029e9b8a4176478211d2d61d9ab7eab502ce73c1a09c3d9caacec9e97","last_reissued_at":"2026-05-27T02:06:19.012511Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T02:06:19.012511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MATCHA: Matching Text via Contrastive Semantic Alignment","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Carsten Eickhoff, Ece Sena Etoglu, Seyed Ali Bahrainian, Siran Li","submitted_at":"2026-05-26T17:47:14Z","abstract_excerpt":"Reliable evaluation is essential for understanding large language model (LLM) performance, yet today's go-to metrics, namely token-overlap scores (e.g., ROUGE) and embedding-based measures (e.g., BERTScore), often misjudge semantic similarity of documents. Our study shows that both token-overlap metrics and embedding-based metrics routinely assign nearly identical scores to texts that directly contradict each other, thereby potentially masking fundamental errors. We introduce MATCHA, an automatic metric that jointly rewards semantic agreement with a reference and penalizes contradictions. MATC"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27345","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27345/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.27345","created_at":"2026-05-27T02:06:19.012623+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.27345v1","created_at":"2026-05-27T02:06:19.012623+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27345","created_at":"2026-05-27T02:06:19.012623+00:00"},{"alias_kind":"pith_short_12","alias_value":"VUPYIABJ5G4K","created_at":"2026-05-27T02:06:19.012623+00:00"},{"alias_kind":"pith_short_16","alias_value":"VUPYIABJ5G4KIF3E","created_at":"2026-05-27T02:06:19.012623+00:00"},{"alias_kind":"pith_short_8","alias_value":"VUPYIABJ","created_at":"2026-05-27T02:06:19.012623+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G","json":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G.json","graph_json":"https://pith.science/api/pith-number/VUPYIABJ5G4KIF3EPAQR2LLB3G/graph.json","events_json":"https://pith.science/api/pith-number/VUPYIABJ5G4KIF3EPAQR2LLB3G/events.json","paper":"https://pith.science/paper/VUPYIABJ"},"agent_actions":{"view_html":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G","download_json":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G.json","view_paper":"https://pith.science/paper/VUPYIABJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.27345&json=true","fetch_graph":"https://pith.science/api/pith-number/VUPYIABJ5G4KIF3EPAQR2LLB3G/graph.json","fetch_events":"https://pith.science/api/pith-number/VUPYIABJ5G4KIF3EPAQR2LLB3G/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G/action/storage_attestation","attest_author":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G/action/author_attestation","sign_citation":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G/action/citation_signature","submit_replication":"https://pith.science/pith/VUPYIABJ5G4KIF3EPAQR2LLB3G/action/replication_record"}},"created_at":"2026-05-27T02:06:19.012623+00:00","updated_at":"2026-05-27T02:06:19.012623+00:00"}