{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:HWD2LSNEC4ER42FXP6VDUET7BG","short_pith_number":"pith:HWD2LSNE","schema_version":"1.0","canonical_sha256":"3d87a5c9a417091e68b77faa3a127f0992b5b1d3bbc264c5b786757850fc00b3","source":{"kind":"arxiv","id":"1904.05426","version":1},"attestation_state":"computed","paper":{"title":"A Grounded Unsupervised Universal Part-of-Speech Tagger for Low-Resource Languages","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Heng Ji, Jonathan May, Ronald Cardenas, Ying Lin","submitted_at":"2019-04-10T20:22:31Z","abstract_excerpt":"Unsupervised part of speech (POS) tagging is often framed as a clustering problem, but practical taggers need to \\textit{ground} their clusters as well. Grounding generally requires reference labeled data, a luxury a low-resource language might not have. In this work, we describe an approach for low-resource unsupervised POS tagging that yields fully grounded output and requires no labeled training data. We find the classic method of Brown et al. (1992) clusters well in our use case and employ a decipherment-based approach to grounding. This approach presumes a sequence of cluster IDs is a `ci"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.05426","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-04-10T20:22:31Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"a57ecc0f5888081f4199d9c40cf1c65c20b2e369be1fec91f5cdec2b4ba29e6a","abstract_canon_sha256":"8f344c34413f0513e6f77af9a781654de4f5d840e230016c5b333aec82f3979e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:49.558506Z","signature_b64":"yAUhAw3VL84+nOUVtgfzRJo+JSH4mo0muZRFa3+b6TlUvRXIztXl0XUtAjUHVzWSMbo+QIkAM73rwzTddA8SCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3d87a5c9a417091e68b77faa3a127f0992b5b1d3bbc264c5b786757850fc00b3","last_reissued_at":"2026-05-17T23:48:49.557852Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:49.557852Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Grounded Unsupervised Universal Part-of-Speech Tagger for Low-Resource Languages","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Heng Ji, Jonathan May, Ronald Cardenas, Ying Lin","submitted_at":"2019-04-10T20:22:31Z","abstract_excerpt":"Unsupervised part of speech (POS) tagging is often framed as a clustering problem, but practical taggers need to \\textit{ground} their clusters as well. Grounding generally requires reference labeled data, a luxury a low-resource language might not have. In this work, we describe an approach for low-resource unsupervised POS tagging that yields fully grounded output and requires no labeled training data. We find the classic method of Brown et al. (1992) clusters well in our use case and employ a decipherment-based approach to grounding. This approach presumes a sequence of cluster IDs is a `ci"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.05426","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.05426","created_at":"2026-05-17T23:48:49.557948+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.05426v1","created_at":"2026-05-17T23:48:49.557948+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.05426","created_at":"2026-05-17T23:48:49.557948+00:00"},{"alias_kind":"pith_short_12","alias_value":"HWD2LSNEC4ER","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"HWD2LSNEC4ER42FX","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"HWD2LSNE","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG","json":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG.json","graph_json":"https://pith.science/api/pith-number/HWD2LSNEC4ER42FXP6VDUET7BG/graph.json","events_json":"https://pith.science/api/pith-number/HWD2LSNEC4ER42FXP6VDUET7BG/events.json","paper":"https://pith.science/paper/HWD2LSNE"},"agent_actions":{"view_html":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG","download_json":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG.json","view_paper":"https://pith.science/paper/HWD2LSNE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.05426&json=true","fetch_graph":"https://pith.science/api/pith-number/HWD2LSNEC4ER42FXP6VDUET7BG/graph.json","fetch_events":"https://pith.science/api/pith-number/HWD2LSNEC4ER42FXP6VDUET7BG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG/action/storage_attestation","attest_author":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG/action/author_attestation","sign_citation":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG/action/citation_signature","submit_replication":"https://pith.science/pith/HWD2LSNEC4ER42FXP6VDUET7BG/action/replication_record"}},"created_at":"2026-05-17T23:48:49.557948+00:00","updated_at":"2026-05-17T23:48:49.557948+00:00"}