{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:OK4IWIQMRFUSS27UFH7MPO7IK3","short_pith_number":"pith:OK4IWIQM","canonical_record":{"source":{"id":"1401.6131","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-01-16T05:20:08Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"a29d6086789db9074b1eae1358f0c396e56af9d3bafa667f512c312c38c61059","abstract_canon_sha256":"5192462e72536863231e2dd11b9997c73c0acdaa445cff6ef569867636f17ed3"},"schema_version":"1.0"},"canonical_sha256":"72b88b220c8969296bf429fec7bbe856f3587482d172953e778fc94c17aa1604","source":{"kind":"arxiv","id":"1401.6131","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1401.6131","created_at":"2026-05-18T03:01:22Z"},{"alias_kind":"arxiv_version","alias_value":"1401.6131v1","created_at":"2026-05-18T03:01:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1401.6131","created_at":"2026-05-18T03:01:22Z"},{"alias_kind":"pith_short_12","alias_value":"OK4IWIQMRFUS","created_at":"2026-05-18T12:28:41Z"},{"alias_kind":"pith_short_16","alias_value":"OK4IWIQMRFUSS27U","created_at":"2026-05-18T12:28:41Z"},{"alias_kind":"pith_short_8","alias_value":"OK4IWIQM","created_at":"2026-05-18T12:28:41Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:OK4IWIQMRFUSS27UFH7MPO7IK3","target":"record","payload":{"canonical_record":{"source":{"id":"1401.6131","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-01-16T05:20:08Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"a29d6086789db9074b1eae1358f0c396e56af9d3bafa667f512c312c38c61059","abstract_canon_sha256":"5192462e72536863231e2dd11b9997c73c0acdaa445cff6ef569867636f17ed3"},"schema_version":"1.0"},"canonical_sha256":"72b88b220c8969296bf429fec7bbe856f3587482d172953e778fc94c17aa1604","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:01:22.708166Z","signature_b64":"qcAMs/c5AWDS6d+DuF2hTrZIlS0vUoIUvNrXA8vq1TQTD3/zbsk4nfcfaYRjNFRpKzMm+RpziV17G39KLzlzCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"72b88b220c8969296bf429fec7bbe856f3587482d172953e778fc94c17aa1604","last_reissued_at":"2026-05-18T03:01:22.707585Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:01:22.707585Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1401.6131","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:01:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YEhL61xu6sffvJn/9Rrk7FypaVxyVP7SLVNl6hC0Kyp3oDXj6CB1s7AvtrQKrJPG3RcQ7FtygwZ/BP2UHWWGDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T16:22:07.134620Z"},"content_sha256":"3e31fd6480cfc9ca5e7c3a748c151e813327c061f7bf975c2eecde656b96fce7","schema_version":"1.0","event_id":"sha256:3e31fd6480cfc9ca5e7c3a748c151e813327c061f7bf975c2eecde656b96fce7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:OK4IWIQMRFUSS27UFH7MPO7IK3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Controlling Complexity in Part-of-Speech Induction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Ben Taskar, Fernando Pereira, Jo\\~ao V. Gra\\c{c}a, Kuzman Ganchev, Luisa Coheur","submitted_at":"2014-01-16T05:20:08Z","abstract_excerpt":"We consider the problem of fully unsupervised learning of grammatical (part-of-speech) categories from unlabeled text. The standard maximum-likelihood hidden Markov model for this task performs poorly, because of its weak inductive bias and large model capacity. We address this problem by refining the model and modifying the learning objective to control its capacity via para- metric and non-parametric constraints. Our approach enforces word-category association sparsity, adds morphological and orthographic features, and eliminates hard-to-estimate parameters for rare words. We develop an effi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1401.6131","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:01:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nv/lb27EpSLhpBf6LSlnscpTC6wDgkf7n+B5q/KdqhzdthldHS+Rc8aPuGBlZelrNGOt/sKq7zTwD27tG97wDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T16:22:07.135231Z"},"content_sha256":"7736cd47568caaaa58cabf799f3a4ec23e8aff1d0c5944d9fda5523174a83872","schema_version":"1.0","event_id":"sha256:7736cd47568caaaa58cabf799f3a4ec23e8aff1d0c5944d9fda5523174a83872"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OK4IWIQMRFUSS27UFH7MPO7IK3/bundle.json","state_url":"https://pith.science/pith/OK4IWIQMRFUSS27UFH7MPO7IK3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OK4IWIQMRFUSS27UFH7MPO7IK3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T16:22:07Z","links":{"resolver":"https://pith.science/pith/OK4IWIQMRFUSS27UFH7MPO7IK3","bundle":"https://pith.science/pith/OK4IWIQMRFUSS27UFH7MPO7IK3/bundle.json","state":"https://pith.science/pith/OK4IWIQMRFUSS27UFH7MPO7IK3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OK4IWIQMRFUSS27UFH7MPO7IK3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:OK4IWIQMRFUSS27UFH7MPO7IK3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5192462e72536863231e2dd11b9997c73c0acdaa445cff6ef569867636f17ed3","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-01-16T05:20:08Z","title_canon_sha256":"a29d6086789db9074b1eae1358f0c396e56af9d3bafa667f512c312c38c61059"},"schema_version":"1.0","source":{"id":"1401.6131","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1401.6131","created_at":"2026-05-18T03:01:22Z"},{"alias_kind":"arxiv_version","alias_value":"1401.6131v1","created_at":"2026-05-18T03:01:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1401.6131","created_at":"2026-05-18T03:01:22Z"},{"alias_kind":"pith_short_12","alias_value":"OK4IWIQMRFUS","created_at":"2026-05-18T12:28:41Z"},{"alias_kind":"pith_short_16","alias_value":"OK4IWIQMRFUSS27U","created_at":"2026-05-18T12:28:41Z"},{"alias_kind":"pith_short_8","alias_value":"OK4IWIQM","created_at":"2026-05-18T12:28:41Z"}],"graph_snapshots":[{"event_id":"sha256:7736cd47568caaaa58cabf799f3a4ec23e8aff1d0c5944d9fda5523174a83872","target":"graph","created_at":"2026-05-18T03:01:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the problem of fully unsupervised learning of grammatical (part-of-speech) categories from unlabeled text. The standard maximum-likelihood hidden Markov model for this task performs poorly, because of its weak inductive bias and large model capacity. We address this problem by refining the model and modifying the learning objective to control its capacity via para- metric and non-parametric constraints. Our approach enforces word-category association sparsity, adds morphological and orthographic features, and eliminates hard-to-estimate parameters for rare words. We develop an effi","authors_text":"Ben Taskar, Fernando Pereira, Jo\\~ao V. Gra\\c{c}a, Kuzman Ganchev, Luisa Coheur","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-01-16T05:20:08Z","title":"Controlling Complexity in Part-of-Speech Induction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1401.6131","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3e31fd6480cfc9ca5e7c3a748c151e813327c061f7bf975c2eecde656b96fce7","target":"record","created_at":"2026-05-18T03:01:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5192462e72536863231e2dd11b9997c73c0acdaa445cff6ef569867636f17ed3","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-01-16T05:20:08Z","title_canon_sha256":"a29d6086789db9074b1eae1358f0c396e56af9d3bafa667f512c312c38c61059"},"schema_version":"1.0","source":{"id":"1401.6131","kind":"arxiv","version":1}},"canonical_sha256":"72b88b220c8969296bf429fec7bbe856f3587482d172953e778fc94c17aa1604","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"72b88b220c8969296bf429fec7bbe856f3587482d172953e778fc94c17aa1604","first_computed_at":"2026-05-18T03:01:22.707585Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:01:22.707585Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qcAMs/c5AWDS6d+DuF2hTrZIlS0vUoIUvNrXA8vq1TQTD3/zbsk4nfcfaYRjNFRpKzMm+RpziV17G39KLzlzCw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:01:22.708166Z","signed_message":"canonical_sha256_bytes"},"source_id":"1401.6131","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3e31fd6480cfc9ca5e7c3a748c151e813327c061f7bf975c2eecde656b96fce7","sha256:7736cd47568caaaa58cabf799f3a4ec23e8aff1d0c5944d9fda5523174a83872"],"state_sha256":"baa5aa33766ee93bd5fdb23629482be82abf1608752e4624cddb46c2227b0cf8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nk3H2Ar4xYau5d+P5X8aJ6740G4dmVTrQJGttyUxG7ZG8qhw6Gjk5yiO5X4x3ahTp1JhDu87iNbXQz/pCJ4KBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T16:22:07.140817Z","bundle_sha256":"7f2a2df2a580940e232ffa27f66a1bca9b76144ff41ee6073d4b403d70cb12bd"}}