{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:JDRYBKYSRWIFZWDPSBPESCVVDV","short_pith_number":"pith:JDRYBKYS","schema_version":"1.0","canonical_sha256":"48e380ab128d905cd86f905e490ab51d70403f65c1ca02fae8ee8ab3520b8462","source":{"kind":"arxiv","id":"2605.29271","version":1},"attestation_state":"computed","paper":{"title":"CoHyDE: Iterative Co-Training of LLM Rewriter & Dense Encoder for Tool Retrieval","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.AI","authors_text":"Ashutosh Hathidara, Sebastian Schreiber, Vaishali Senthil","submitted_at":"2026-05-28T02:41:30Z","abstract_excerpt":"Tool retrieval over large API catalogs is a core bottleneck for LLM agents: user queries arrive in colloquial, often underspecified language, while the catalog uses technical API vocabulary that no fixed encoder can bridge on its own. The two dominant training approaches, contrastive encoder fine-tuning and HyDE-style query expansion with a frozen LLM, address this problem from opposite ends and fail in complementary directions: the fine-tuned encoder excels when the query's surface form already matches the catalog but collapses when it does not, while zero-shot HyDE is more robust to underspe"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.29271","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-28T02:41:30Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"71b14a0132196355279ce44ae0e9d361ca08acfa439accc7df6111bef3625138","abstract_canon_sha256":"9015edffca58ac573d1c66196ed2858bce75f5a98c23a61fab74463d47a40eb4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:28.376714Z","signature_b64":"OghvQf8ctJWeBdgLuGb9GhVQhJO/M5LBJ50bZaKH9YYoZkFArgc4jDpBI7rWM71nBeluUb3xWThRphNXLUhZBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"48e380ab128d905cd86f905e490ab51d70403f65c1ca02fae8ee8ab3520b8462","last_reissued_at":"2026-05-29T01:05:28.375953Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:28.375953Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CoHyDE: Iterative Co-Training of LLM Rewriter & Dense Encoder for Tool Retrieval","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.AI","authors_text":"Ashutosh Hathidara, Sebastian Schreiber, Vaishali Senthil","submitted_at":"2026-05-28T02:41:30Z","abstract_excerpt":"Tool retrieval over large API catalogs is a core bottleneck for LLM agents: user queries arrive in colloquial, often underspecified language, while the catalog uses technical API vocabulary that no fixed encoder can bridge on its own. The two dominant training approaches, contrastive encoder fine-tuning and HyDE-style query expansion with a frozen LLM, address this problem from opposite ends and fail in complementary directions: the fine-tuned encoder excels when the query's surface form already matches the catalog but collapses when it does not, while zero-shot HyDE is more robust to underspe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29271","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.29271/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.29271","created_at":"2026-05-29T01:05:28.376068+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.29271v1","created_at":"2026-05-29T01:05:28.376068+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29271","created_at":"2026-05-29T01:05:28.376068+00:00"},{"alias_kind":"pith_short_12","alias_value":"JDRYBKYSRWIF","created_at":"2026-05-29T01:05:28.376068+00:00"},{"alias_kind":"pith_short_16","alias_value":"JDRYBKYSRWIFZWDP","created_at":"2026-05-29T01:05:28.376068+00:00"},{"alias_kind":"pith_short_8","alias_value":"JDRYBKYS","created_at":"2026-05-29T01:05:28.376068+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV","json":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV.json","graph_json":"https://pith.science/api/pith-number/JDRYBKYSRWIFZWDPSBPESCVVDV/graph.json","events_json":"https://pith.science/api/pith-number/JDRYBKYSRWIFZWDPSBPESCVVDV/events.json","paper":"https://pith.science/paper/JDRYBKYS"},"agent_actions":{"view_html":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV","download_json":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV.json","view_paper":"https://pith.science/paper/JDRYBKYS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.29271&json=true","fetch_graph":"https://pith.science/api/pith-number/JDRYBKYSRWIFZWDPSBPESCVVDV/graph.json","fetch_events":"https://pith.science/api/pith-number/JDRYBKYSRWIFZWDPSBPESCVVDV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV/action/storage_attestation","attest_author":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV/action/author_attestation","sign_citation":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV/action/citation_signature","submit_replication":"https://pith.science/pith/JDRYBKYSRWIFZWDPSBPESCVVDV/action/replication_record"}},"created_at":"2026-05-29T01:05:28.376068+00:00","updated_at":"2026-05-29T01:05:28.376068+00:00"}