{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:KOVPAO2N5ERMBHZP4YL5I3L4UI","short_pith_number":"pith:KOVPAO2N","canonical_record":{"source":{"id":"2605.14301","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T03:07:48Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"b087d1cb81493fc05613a8cd0ecb5922ad5556792eeb179f17772047cb07f282","abstract_canon_sha256":"1d0a48852d500965c63bf9a453e0da1ce8fa32d991ae8bb9c9bc1b91f990597e"},"schema_version":"1.0"},"canonical_sha256":"53aaf03b4de922c09f2fe617d46d7ca21a3cfd1b2b80f3c833d8fbf0a1e3d70d","source":{"kind":"arxiv","id":"2605.14301","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14301","created_at":"2026-05-17T23:39:10Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14301v1","created_at":"2026-05-17T23:39:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14301","created_at":"2026-05-17T23:39:10Z"},{"alias_kind":"pith_short_12","alias_value":"KOVPAO2N5ERM","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"KOVPAO2N5ERMBHZP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"KOVPAO2N","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:KOVPAO2N5ERMBHZP4YL5I3L4UI","target":"record","payload":{"canonical_record":{"source":{"id":"2605.14301","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T03:07:48Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"b087d1cb81493fc05613a8cd0ecb5922ad5556792eeb179f17772047cb07f282","abstract_canon_sha256":"1d0a48852d500965c63bf9a453e0da1ce8fa32d991ae8bb9c9bc1b91f990597e"},"schema_version":"1.0"},"canonical_sha256":"53aaf03b4de922c09f2fe617d46d7ca21a3cfd1b2b80f3c833d8fbf0a1e3d70d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:10.094277Z","signature_b64":"OAodBPQbod3Jea9965kcBZYTyUDXFaOxSXn2jNl/uPeUO0Bla2SHa6VW3yBmTPVbTvdG0JMrdx3VimBcylqpAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"53aaf03b4de922c09f2fe617d46d7ca21a3cfd1b2b80f3c833d8fbf0a1e3d70d","last_reissued_at":"2026-05-17T23:39:10.093765Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:10.093765Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.14301","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Bax/uij8r8LSkiVu1brv8XWNYac7Y8KO+/1dj2FspHbUOHrkirqLBgvvXjU0drncRbH5YGR759tWbS5jmQo7AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T02:13:52.838208Z"},"content_sha256":"bf337bfee159a177db907f2bab9c89994128e1c4e33a5e3bc73cd0da341d0530","schema_version":"1.0","event_id":"sha256:bf337bfee159a177db907f2bab9c89994128e1c4e33a5e3bc73cd0da341d0530"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:KOVPAO2N5ERMBHZP4YL5I3L4UI","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Language-Induced Priors for Domain Adaptation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Language-induced priors from textual descriptions let domain adaptation match oracle performance when target data is scarce.","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Jiayu Zhou, Qiyuan Chen, Raed Al Kontar","submitted_at":"2026-05-14T03:07:48Z","abstract_excerpt":"Domain adaptation faces a fundamental paradox in the cold-start regime. When target data is scarce, statistical methods fail to distinguish relevant source domains from irrelevant ones, which often leads to negative transfer. In this paper, we address this challenge by leveraging expert textual descriptions of the target domain, a resource that is often available but overlooked. We propose a probabilistic framework that translates these semantic descriptions into a choice model, namely a Language-Induced Prior (LIP), that learns the preferences from a pretrained Large Language Model (LLM). The"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We prove that the estimator roughly matches an oracle cold-start MSE under a correct prior, while remaining asymptotically consistent regardless of the quality of the LIP.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The LLM-derived choice model accurately captures source relevance from textual descriptions; if this mapping is systematically biased, the early-stage guidance in the EM algorithm can degrade performance before data overrides it.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Language-Induced Priors from LLMs guide source selection in cold-start domain adaptation through an EM algorithm, matching oracle MSE under a correct prior and remaining asymptotically consistent.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Language-induced priors from textual descriptions let domain adaptation match oracle performance when target data is scarce.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"043c1e4028103032072e58b02a236f7a4be497cd9cd3fc7844dbca1833df9867"},"source":{"id":"2605.14301","kind":"arxiv","version":1},"verdict":{"id":"02565690-c93b-4b20-8697-ca46aaab8484","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T02:04:11.039074Z","strongest_claim":"We prove that the estimator roughly matches an oracle cold-start MSE under a correct prior, while remaining asymptotically consistent regardless of the quality of the LIP.","one_line_summary":"Language-Induced Priors from LLMs guide source selection in cold-start domain adaptation through an EM algorithm, matching oracle MSE under a correct prior and remaining asymptotically consistent.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The LLM-derived choice model accurately captures source relevance from textual descriptions; if this mapping is systematically biased, the early-stage guidance in the EM algorithm can degrade performance before data overrides it.","pith_extraction_headline":"Language-induced priors from textual descriptions let domain adaptation match oracle performance when target data is scarce."},"references":{"count":38,"sample":[{"doi":"","year":2022,"title":"IEEE/CAA Journal of Automatica Sinica , volume=","work_id":"bf8ab5a6-eb3d-4990-918d-0b8fd66460e3","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1998,"title":"Asymptotic Statistics , author=. 1998 , publisher=","work_id":"04d112d2-91c2-435d-b9e8-d7b5d526fe98","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2013,"title":"2013 International Conference on Machine Learning and Cybernetics , volume=","work_id":"845e3471-4275-4566-92bc-cd0fdfc1fc1b","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in neural information processing systems , volume=","work_id":"b144ac0e-8b4f-43dd-8d60-7cc4685e38d2","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Advances in neural information processing systems , volume=","work_id":"83eb271d-29e5-42a9-a1aa-11ef7f5b0f62","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":38,"snapshot_sha256":"9e3ba7038ec2872881b4f4e340146b32d4fc80b9162ac40c17ea12ee57d10f05","internal_anchors":1},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a3ab27dccdb3f43f91240278f5cde60e14faa7d581c6dd461735b538457bb12f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"02565690-c93b-4b20-8697-ca46aaab8484"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xv98EhSvyEaanMSEIGkio9VmH24pSJQ0K0FNvoIYSfTwRnjkoxrnM/5NmnL/L8Iz9zNIXJSYksa+bz+lnZs5Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T02:13:52.838798Z"},"content_sha256":"3077ae187ab5b35d270b3ff8c4fa79bf4ba1ef79c0a0016ea9d6f5cc1bef5d54","schema_version":"1.0","event_id":"sha256:3077ae187ab5b35d270b3ff8c4fa79bf4ba1ef79c0a0016ea9d6f5cc1bef5d54"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI/bundle.json","state_url":"https://pith.science/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T02:13:52Z","links":{"resolver":"https://pith.science/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI","bundle":"https://pith.science/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI/bundle.json","state":"https://pith.science/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KOVPAO2N5ERMBHZP4YL5I3L4UI/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:KOVPAO2N5ERMBHZP4YL5I3L4UI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1d0a48852d500965c63bf9a453e0da1ce8fa32d991ae8bb9c9bc1b91f990597e","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T03:07:48Z","title_canon_sha256":"b087d1cb81493fc05613a8cd0ecb5922ad5556792eeb179f17772047cb07f282"},"schema_version":"1.0","source":{"id":"2605.14301","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14301","created_at":"2026-05-17T23:39:10Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14301v1","created_at":"2026-05-17T23:39:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14301","created_at":"2026-05-17T23:39:10Z"},{"alias_kind":"pith_short_12","alias_value":"KOVPAO2N5ERM","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"KOVPAO2N5ERMBHZP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"KOVPAO2N","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:3077ae187ab5b35d270b3ff8c4fa79bf4ba1ef79c0a0016ea9d6f5cc1bef5d54","target":"graph","created_at":"2026-05-17T23:39:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We prove that the estimator roughly matches an oracle cold-start MSE under a correct prior, while remaining asymptotically consistent regardless of the quality of the LIP."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The LLM-derived choice model accurately captures source relevance from textual descriptions; if this mapping is systematically biased, the early-stage guidance in the EM algorithm can degrade performance before data overrides it."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Language-Induced Priors from LLMs guide source selection in cold-start domain adaptation through an EM algorithm, matching oracle MSE under a correct prior and remaining asymptotically consistent."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Language-induced priors from textual descriptions let domain adaptation match oracle performance when target data is scarce."}],"snapshot_sha256":"043c1e4028103032072e58b02a236f7a4be497cd9cd3fc7844dbca1833df9867"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a3ab27dccdb3f43f91240278f5cde60e14faa7d581c6dd461735b538457bb12f"},"paper":{"abstract_excerpt":"Domain adaptation faces a fundamental paradox in the cold-start regime. When target data is scarce, statistical methods fail to distinguish relevant source domains from irrelevant ones, which often leads to negative transfer. In this paper, we address this challenge by leveraging expert textual descriptions of the target domain, a resource that is often available but overlooked. We propose a probabilistic framework that translates these semantic descriptions into a choice model, namely a Language-Induced Prior (LIP), that learns the preferences from a pretrained Large Language Model (LLM). The","authors_text":"Jiayu Zhou, Qiyuan Chen, Raed Al Kontar","cross_cats":["stat.ML"],"headline":"Language-induced priors from textual descriptions let domain adaptation match oracle performance when target data is scarce.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T03:07:48Z","title":"Language-Induced Priors for Domain Adaptation"},"references":{"count":38,"internal_anchors":1,"resolved_work":38,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"IEEE/CAA Journal of Automatica Sinica , volume=","work_id":"bf8ab5a6-eb3d-4990-918d-0b8fd66460e3","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Asymptotic Statistics , author=. 1998 , publisher=","work_id":"04d112d2-91c2-435d-b9e8-d7b5d526fe98","year":1998},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"2013 International Conference on Machine Learning and Cybernetics , volume=","work_id":"845e3471-4275-4566-92bc-cd0fdfc1fc1b","year":2013},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Advances in neural information processing systems , volume=","work_id":"b144ac0e-8b4f-43dd-8d60-7cc4685e38d2","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Advances in neural information processing systems , volume=","work_id":"83eb271d-29e5-42a9-a1aa-11ef7f5b0f62","year":null}],"snapshot_sha256":"9e3ba7038ec2872881b4f4e340146b32d4fc80b9162ac40c17ea12ee57d10f05"},"source":{"id":"2605.14301","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T02:04:11.039074Z","id":"02565690-c93b-4b20-8697-ca46aaab8484","model_set":{"reader":"grok-4.3"},"one_line_summary":"Language-Induced Priors from LLMs guide source selection in cold-start domain adaptation through an EM algorithm, matching oracle MSE under a correct prior and remaining asymptotically consistent.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Language-induced priors from textual descriptions let domain adaptation match oracle performance when target data is scarce.","strongest_claim":"We prove that the estimator roughly matches an oracle cold-start MSE under a correct prior, while remaining asymptotically consistent regardless of the quality of the LIP.","weakest_assumption":"The LLM-derived choice model accurately captures source relevance from textual descriptions; if this mapping is systematically biased, the early-stage guidance in the EM algorithm can degrade performance before data overrides it."}},"verdict_id":"02565690-c93b-4b20-8697-ca46aaab8484"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bf337bfee159a177db907f2bab9c89994128e1c4e33a5e3bc73cd0da341d0530","target":"record","created_at":"2026-05-17T23:39:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1d0a48852d500965c63bf9a453e0da1ce8fa32d991ae8bb9c9bc1b91f990597e","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T03:07:48Z","title_canon_sha256":"b087d1cb81493fc05613a8cd0ecb5922ad5556792eeb179f17772047cb07f282"},"schema_version":"1.0","source":{"id":"2605.14301","kind":"arxiv","version":1}},"canonical_sha256":"53aaf03b4de922c09f2fe617d46d7ca21a3cfd1b2b80f3c833d8fbf0a1e3d70d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"53aaf03b4de922c09f2fe617d46d7ca21a3cfd1b2b80f3c833d8fbf0a1e3d70d","first_computed_at":"2026-05-17T23:39:10.093765Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:10.093765Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OAodBPQbod3Jea9965kcBZYTyUDXFaOxSXn2jNl/uPeUO0Bla2SHa6VW3yBmTPVbTvdG0JMrdx3VimBcylqpAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:10.094277Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14301","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bf337bfee159a177db907f2bab9c89994128e1c4e33a5e3bc73cd0da341d0530","sha256:3077ae187ab5b35d270b3ff8c4fa79bf4ba1ef79c0a0016ea9d6f5cc1bef5d54"],"state_sha256":"3d48c3e828f1faa6cb293ee12761dd5c7a949b46c72bd8353b92e3596deb3be2"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5FoR1qMZGzEhS8JM7nrgSUcuW3P1NtnXR9vyscVLHqbmq5mTUBXDsNCXVT/EbzGkxuqmUB1YH6TtyDUX2b/kCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T02:13:52.843373Z","bundle_sha256":"e289b9a28de3706ebe983c6d0d2fc49d4b73c9b011bf491ddb639ed634f32698"}}