{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:V4YYYIN3Y4JGLOBJDTAUHAW5RK","short_pith_number":"pith:V4YYYIN3","schema_version":"1.0","canonical_sha256":"af318c21bbc71265b8291cc14382dd8aaf551b8a9d49fe6cdc7d6452de0a274e","source":{"kind":"arxiv","id":"2605.24556","version":1},"attestation_state":"computed","paper":{"title":"The Multilingual Curse at the Retrieval Layer: Evidence from Amharic","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.IR","authors_text":"Kidist Amde Mekonnen, Maarten de Rijke, Yosef Worku Alemneh","submitted_at":"2026-05-23T12:44:30Z","abstract_excerpt":"Multilingual retrieval increasingly underpins cross-lingual question answering and retrieval-augmented generation. Strong zero-shot scores on multilingual benchmarks are often taken as evidence that current encoders transfer reliably across many languages. We argue that this assumption breaks down for underrepresented, morphologically rich languages, and use Amharic as a diagnostic case. Under a shared passage retrieval protocol covering dense, late-interaction, learned sparse, and cross-encoder paradigms, we compare zero-shot multilingual retrievers, Amharic-fine-tuned multilingual retrievers"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.24556","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-23T12:44:30Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"619fffaf6395ba870aa1a85cf1da8632722a0640e3414089ebe30bad82e2011c","abstract_canon_sha256":"5cf1d909d261ef952e2ce1e7675d8a1213fbc2b41a7eab7fc858ab1e2d3d446b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:46.207005Z","signature_b64":"zVp8r6pB1u/mtAvxSpuClxfd+ZRxtmon+JDsHtL2TTAtwf7YwcW3VlIvSarw7urrLgme2FSJ3vLzMHZqqeAGDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"af318c21bbc71265b8291cc14382dd8aaf551b8a9d49fe6cdc7d6452de0a274e","last_reissued_at":"2026-05-26T01:03:46.206018Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:46.206018Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Multilingual Curse at the Retrieval Layer: Evidence from Amharic","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.IR","authors_text":"Kidist Amde Mekonnen, Maarten de Rijke, Yosef Worku Alemneh","submitted_at":"2026-05-23T12:44:30Z","abstract_excerpt":"Multilingual retrieval increasingly underpins cross-lingual question answering and retrieval-augmented generation. Strong zero-shot scores on multilingual benchmarks are often taken as evidence that current encoders transfer reliably across many languages. We argue that this assumption breaks down for underrepresented, morphologically rich languages, and use Amharic as a diagnostic case. Under a shared passage retrieval protocol covering dense, late-interaction, learned sparse, and cross-encoder paradigms, we compare zero-shot multilingual retrievers, Amharic-fine-tuned multilingual retrievers"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24556","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.24556/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.24556","created_at":"2026-05-26T01:03:46.206215+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.24556v1","created_at":"2026-05-26T01:03:46.206215+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24556","created_at":"2026-05-26T01:03:46.206215+00:00"},{"alias_kind":"pith_short_12","alias_value":"V4YYYIN3Y4JG","created_at":"2026-05-26T01:03:46.206215+00:00"},{"alias_kind":"pith_short_16","alias_value":"V4YYYIN3Y4JGLOBJ","created_at":"2026-05-26T01:03:46.206215+00:00"},{"alias_kind":"pith_short_8","alias_value":"V4YYYIN3","created_at":"2026-05-26T01:03:46.206215+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK","json":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK.json","graph_json":"https://pith.science/api/pith-number/V4YYYIN3Y4JGLOBJDTAUHAW5RK/graph.json","events_json":"https://pith.science/api/pith-number/V4YYYIN3Y4JGLOBJDTAUHAW5RK/events.json","paper":"https://pith.science/paper/V4YYYIN3"},"agent_actions":{"view_html":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK","download_json":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK.json","view_paper":"https://pith.science/paper/V4YYYIN3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.24556&json=true","fetch_graph":"https://pith.science/api/pith-number/V4YYYIN3Y4JGLOBJDTAUHAW5RK/graph.json","fetch_events":"https://pith.science/api/pith-number/V4YYYIN3Y4JGLOBJDTAUHAW5RK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK/action/storage_attestation","attest_author":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK/action/author_attestation","sign_citation":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK/action/citation_signature","submit_replication":"https://pith.science/pith/V4YYYIN3Y4JGLOBJDTAUHAW5RK/action/replication_record"}},"created_at":"2026-05-26T01:03:46.206215+00:00","updated_at":"2026-05-26T01:03:46.206215+00:00"}