{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:CR6WHE4XC2ESSEPCONO7YYMI2B","short_pith_number":"pith:CR6WHE4X","schema_version":"1.0","canonical_sha256":"147d63939716892911e2735dfc6188d0502a06eba9b5d663ed0101671d79cf73","source":{"kind":"arxiv","id":"1509.04473","version":1},"attestation_state":"computed","paper":{"title":"Splitting Compounds by Semantic Analogy","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Joachim Daiber, Lautaro Quiroz, Roger Wechsler, Stella Frank","submitted_at":"2015-09-15T10:03:35Z","abstract_excerpt":"Compounding is a highly productive word-formation process in some languages that is often problematic for natural language processing applications. In this paper, we investigate whether distributional semantics in the form of word embeddings can enable a deeper, i.e., more knowledge-rich, processing of compounds than the standard string-based methods. We present an unsupervised approach that exploits regularities in the semantic vector space (based on analogies such as \"bookshop is to shop as bookshelf is to shelf\") to produce compound analyses of high quality. A subsequent compound splitting "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1509.04473","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2015-09-15T10:03:35Z","cross_cats_sorted":[],"title_canon_sha256":"3b4ae93eec111226d30b4ecf11e1fa00c23f301889834e47c3d852846480a57f","abstract_canon_sha256":"9a70c6ad22b3191dbeffc538cd827258e323ae552ddb63ba3df53f8396b2b3a2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:32:58.818089Z","signature_b64":"k3+/XLo1kQRyfFmogmpJ/T/C0pSEuhPZcwHMBuD0Y6zNOnY2BS8Pm0darCHBjm239XdKRTVuByuMXQBswLCFAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"147d63939716892911e2735dfc6188d0502a06eba9b5d663ed0101671d79cf73","last_reissued_at":"2026-05-18T01:32:58.817537Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:32:58.817537Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Splitting Compounds by Semantic Analogy","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Joachim Daiber, Lautaro Quiroz, Roger Wechsler, Stella Frank","submitted_at":"2015-09-15T10:03:35Z","abstract_excerpt":"Compounding is a highly productive word-formation process in some languages that is often problematic for natural language processing applications. In this paper, we investigate whether distributional semantics in the form of word embeddings can enable a deeper, i.e., more knowledge-rich, processing of compounds than the standard string-based methods. We present an unsupervised approach that exploits regularities in the semantic vector space (based on analogies such as \"bookshop is to shop as bookshelf is to shelf\") to produce compound analyses of high quality. A subsequent compound splitting "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1509.04473","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1509.04473","created_at":"2026-05-18T01:32:58.817613+00:00"},{"alias_kind":"arxiv_version","alias_value":"1509.04473v1","created_at":"2026-05-18T01:32:58.817613+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1509.04473","created_at":"2026-05-18T01:32:58.817613+00:00"},{"alias_kind":"pith_short_12","alias_value":"CR6WHE4XC2ES","created_at":"2026-05-18T12:29:17.054201+00:00"},{"alias_kind":"pith_short_16","alias_value":"CR6WHE4XC2ESSEPC","created_at":"2026-05-18T12:29:17.054201+00:00"},{"alias_kind":"pith_short_8","alias_value":"CR6WHE4X","created_at":"2026-05-18T12:29:17.054201+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B","json":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B.json","graph_json":"https://pith.science/api/pith-number/CR6WHE4XC2ESSEPCONO7YYMI2B/graph.json","events_json":"https://pith.science/api/pith-number/CR6WHE4XC2ESSEPCONO7YYMI2B/events.json","paper":"https://pith.science/paper/CR6WHE4X"},"agent_actions":{"view_html":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B","download_json":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B.json","view_paper":"https://pith.science/paper/CR6WHE4X","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1509.04473&json=true","fetch_graph":"https://pith.science/api/pith-number/CR6WHE4XC2ESSEPCONO7YYMI2B/graph.json","fetch_events":"https://pith.science/api/pith-number/CR6WHE4XC2ESSEPCONO7YYMI2B/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B/action/storage_attestation","attest_author":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B/action/author_attestation","sign_citation":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B/action/citation_signature","submit_replication":"https://pith.science/pith/CR6WHE4XC2ESSEPCONO7YYMI2B/action/replication_record"}},"created_at":"2026-05-18T01:32:58.817613+00:00","updated_at":"2026-05-18T01:32:58.817613+00:00"}