{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:7HZN7IQY3OYIG67UFRBYQRAGSF","short_pith_number":"pith:7HZN7IQY","schema_version":"1.0","canonical_sha256":"f9f2dfa218dbb0837bf42c43884406915318ec636010b304146d66366325ec86","source":{"kind":"arxiv","id":"1606.06950","version":2},"attestation_state":"computed","paper":{"title":"A segmental framework for fully-unsupervised large-vocabulary speech recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Aren Jansen, Herman Kamper, Sharon Goldwater","submitted_at":"2016-06-22T13:51:57Z","abstract_excerpt":"Zero-resource speech technology is a growing research area that aims to develop methods for speech processing in the absence of transcriptions, lexicons, or language modelling text. Early term discovery systems focused on identifying isolated recurring patterns in a corpus, while more recent full-coverage systems attempt to completely segment and cluster the audio into word-like units---effectively performing unsupervised speech recognition. This article presents the first attempt we are aware of to apply such a system to large-vocabulary multi-speaker data. Our system uses a Bayesian modellin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1606.06950","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-06-22T13:51:57Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"5fa52ba38905429afafa1020262c7e3d5adaf7c03835564d4641e4be2dd74b24","abstract_canon_sha256":"73101ac32806d084755df046ad0b43fd6880a8e69731b49d55b8f9658700d1f2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:35:05.350067Z","signature_b64":"WLuai6QoHyIzRYs3OtKcMtvx3Ad+N71qgvnPgh8CrZUxtZ28k051oZ6SQvYnuRKjAJmuA/XqdRdul78gJFirDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f9f2dfa218dbb0837bf42c43884406915318ec636010b304146d66366325ec86","last_reissued_at":"2026-05-18T00:35:05.349399Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:35:05.349399Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A segmental framework for fully-unsupervised large-vocabulary speech recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Aren Jansen, Herman Kamper, Sharon Goldwater","submitted_at":"2016-06-22T13:51:57Z","abstract_excerpt":"Zero-resource speech technology is a growing research area that aims to develop methods for speech processing in the absence of transcriptions, lexicons, or language modelling text. Early term discovery systems focused on identifying isolated recurring patterns in a corpus, while more recent full-coverage systems attempt to completely segment and cluster the audio into word-like units---effectively performing unsupervised speech recognition. This article presents the first attempt we are aware of to apply such a system to large-vocabulary multi-speaker data. Our system uses a Bayesian modellin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.06950","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1606.06950","created_at":"2026-05-18T00:35:05.349530+00:00"},{"alias_kind":"arxiv_version","alias_value":"1606.06950v2","created_at":"2026-05-18T00:35:05.349530+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.06950","created_at":"2026-05-18T00:35:05.349530+00:00"},{"alias_kind":"pith_short_12","alias_value":"7HZN7IQY3OYI","created_at":"2026-05-18T12:30:04.600751+00:00"},{"alias_kind":"pith_short_16","alias_value":"7HZN7IQY3OYIG67U","created_at":"2026-05-18T12:30:04.600751+00:00"},{"alias_kind":"pith_short_8","alias_value":"7HZN7IQY","created_at":"2026-05-18T12:30:04.600751+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF","json":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF.json","graph_json":"https://pith.science/api/pith-number/7HZN7IQY3OYIG67UFRBYQRAGSF/graph.json","events_json":"https://pith.science/api/pith-number/7HZN7IQY3OYIG67UFRBYQRAGSF/events.json","paper":"https://pith.science/paper/7HZN7IQY"},"agent_actions":{"view_html":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF","download_json":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF.json","view_paper":"https://pith.science/paper/7HZN7IQY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1606.06950&json=true","fetch_graph":"https://pith.science/api/pith-number/7HZN7IQY3OYIG67UFRBYQRAGSF/graph.json","fetch_events":"https://pith.science/api/pith-number/7HZN7IQY3OYIG67UFRBYQRAGSF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF/action/storage_attestation","attest_author":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF/action/author_attestation","sign_citation":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF/action/citation_signature","submit_replication":"https://pith.science/pith/7HZN7IQY3OYIG67UFRBYQRAGSF/action/replication_record"}},"created_at":"2026-05-18T00:35:05.349530+00:00","updated_at":"2026-05-18T00:35:05.349530+00:00"}