{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OLMII6QK6BYH6G52FCWHZS3XML","short_pith_number":"pith:OLMII6QK","schema_version":"1.0","canonical_sha256":"72d8847a0af0707f1bba28ac7ccb7762fa958fd762c95aeac34a6fee0b3a1111","source":{"kind":"arxiv","id":"2605.21812","version":1},"attestation_state":"computed","paper":{"title":"Bridging the Cold-Start Gap: LLM-Powered Synthetic Data Generation for Natural Language Search at Airbnb","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Dillon Davis, Hao Li, Huiji Gao, Kedar Bellare, Malay Haldar, Sanjeev Katariya, Soumyadip Banerjee, Stephanie Moyerman, Weiwei Guo, Wendy Ran Wei, Xiaowei Liu, Xueyin Chen","submitted_at":"2026-05-20T23:18:49Z","abstract_excerpt":"Deploying natural language search systems presents a critical cold-start challenge: no real user queries to learn linguistic patterns, and no relevance labels to train ranking models. We present a framework for generating synthetic queries and labels using large language models (LLMs), powering model training and evaluation for Airbnb's natural language search.\n  For query generation, we combine contrastive listing pairs from booking sessions with seed queries from user research to balance realism and diversity, enabling a cold-to-warm start transition as real user data becomes available. For "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.21812","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-20T23:18:49Z","cross_cats_sorted":[],"title_canon_sha256":"34b4bb61c4c66527fe0b8cc65d5b205f050cfd7d8fd0c3e32ff0faa8c62468ff","abstract_canon_sha256":"efa680506a5b40e4fbe35f37460ec37fc32c666d993453547d7242baf3e696b3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:03:33.516420Z","signature_b64":"j4tIE8CtnXOwKDahDbo2NiN42oDr2ed5zpr0S+z9bxeGI1Ak1j/PXCkJlCYexZKe3QEJS0sf/gQUXWVRlq5CAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"72d8847a0af0707f1bba28ac7ccb7762fa958fd762c95aeac34a6fee0b3a1111","last_reissued_at":"2026-05-22T01:03:33.515664Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:03:33.515664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Bridging the Cold-Start Gap: LLM-Powered Synthetic Data Generation for Natural Language Search at Airbnb","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Dillon Davis, Hao Li, Huiji Gao, Kedar Bellare, Malay Haldar, Sanjeev Katariya, Soumyadip Banerjee, Stephanie Moyerman, Weiwei Guo, Wendy Ran Wei, Xiaowei Liu, Xueyin Chen","submitted_at":"2026-05-20T23:18:49Z","abstract_excerpt":"Deploying natural language search systems presents a critical cold-start challenge: no real user queries to learn linguistic patterns, and no relevance labels to train ranking models. We present a framework for generating synthetic queries and labels using large language models (LLMs), powering model training and evaluation for Airbnb's natural language search.\n  For query generation, we combine contrastive listing pairs from booking sessions with seed queries from user research to balance realism and diversity, enabling a cold-to-warm start transition as real user data becomes available. For "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21812","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.21812/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.21812","created_at":"2026-05-22T01:03:33.515782+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.21812v1","created_at":"2026-05-22T01:03:33.515782+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21812","created_at":"2026-05-22T01:03:33.515782+00:00"},{"alias_kind":"pith_short_12","alias_value":"OLMII6QK6BYH","created_at":"2026-05-22T01:03:33.515782+00:00"},{"alias_kind":"pith_short_16","alias_value":"OLMII6QK6BYH6G52","created_at":"2026-05-22T01:03:33.515782+00:00"},{"alias_kind":"pith_short_8","alias_value":"OLMII6QK","created_at":"2026-05-22T01:03:33.515782+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML","json":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML.json","graph_json":"https://pith.science/api/pith-number/OLMII6QK6BYH6G52FCWHZS3XML/graph.json","events_json":"https://pith.science/api/pith-number/OLMII6QK6BYH6G52FCWHZS3XML/events.json","paper":"https://pith.science/paper/OLMII6QK"},"agent_actions":{"view_html":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML","download_json":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML.json","view_paper":"https://pith.science/paper/OLMII6QK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.21812&json=true","fetch_graph":"https://pith.science/api/pith-number/OLMII6QK6BYH6G52FCWHZS3XML/graph.json","fetch_events":"https://pith.science/api/pith-number/OLMII6QK6BYH6G52FCWHZS3XML/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML/action/storage_attestation","attest_author":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML/action/author_attestation","sign_citation":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML/action/citation_signature","submit_replication":"https://pith.science/pith/OLMII6QK6BYH6G52FCWHZS3XML/action/replication_record"}},"created_at":"2026-05-22T01:03:33.515782+00:00","updated_at":"2026-05-22T01:03:33.515782+00:00"}