{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:FFNFWROJG2ORV7Z6DAJNRW6LKX","short_pith_number":"pith:FFNFWROJ","schema_version":"1.0","canonical_sha256":"295a5b45c9369d1aff3e1812d8dbcb55c1b8d091742b08ea99476caf4ca8e822","source":{"kind":"arxiv","id":"1705.09724","version":1},"attestation_state":"computed","paper":{"title":"Semi-Supervised Model Training for Unbounded Conversational Speech Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Iroro Orife, Jason Flaks, Morten Pedersen, Shane Walker","submitted_at":"2017-05-26T21:10:15Z","abstract_excerpt":"For conversational large-vocabulary continuous speech recognition (LVCSR) tasks, up to about two thousand hours of audio is commonly used to train state of the art models. Collection of labeled conversational audio however, is prohibitively expensive, laborious and error-prone. Furthermore, academic corpora like Fisher English (2004) or Switchboard (1992) are inadequate to train models with sufficient accuracy in the unbounded space of conversational speech. These corpora are also timeworn due to dated acoustic telephony features and the rapid advancement of colloquial vocabulary and idiomatic"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1705.09724","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-26T21:10:15Z","cross_cats_sorted":[],"title_canon_sha256":"72e9d4029be547ab4a6a0c582cab4d4257375344e4f81771ea3f5a4a4e35f1a5","abstract_canon_sha256":"f923d44e31eae69ffb95b42661ff015348543e22ead4100d7578b914547214df"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:43:34.633005Z","signature_b64":"e8ffkwi84lwh+SO46nukb16MkJWQX5wTadBkkNvx10Q3+pABe+WSQL6gBGnDh4d4sqVy8QkbtxVLD0eNnODgCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"295a5b45c9369d1aff3e1812d8dbcb55c1b8d091742b08ea99476caf4ca8e822","last_reissued_at":"2026-05-18T00:43:34.632509Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:43:34.632509Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Semi-Supervised Model Training for Unbounded Conversational Speech Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Iroro Orife, Jason Flaks, Morten Pedersen, Shane Walker","submitted_at":"2017-05-26T21:10:15Z","abstract_excerpt":"For conversational large-vocabulary continuous speech recognition (LVCSR) tasks, up to about two thousand hours of audio is commonly used to train state of the art models. Collection of labeled conversational audio however, is prohibitively expensive, laborious and error-prone. Furthermore, academic corpora like Fisher English (2004) or Switchboard (1992) are inadequate to train models with sufficient accuracy in the unbounded space of conversational speech. These corpora are also timeworn due to dated acoustic telephony features and the rapid advancement of colloquial vocabulary and idiomatic"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.09724","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1705.09724","created_at":"2026-05-18T00:43:34.632586+00:00"},{"alias_kind":"arxiv_version","alias_value":"1705.09724v1","created_at":"2026-05-18T00:43:34.632586+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.09724","created_at":"2026-05-18T00:43:34.632586+00:00"},{"alias_kind":"pith_short_12","alias_value":"FFNFWROJG2OR","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_16","alias_value":"FFNFWROJG2ORV7Z6","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_8","alias_value":"FFNFWROJ","created_at":"2026-05-18T12:31:15.632608+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.11521","citing_title":"Lattice-Based Unsupervised Test-Time Adaptation of Neural Network Acoustic Models","ref_index":19,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX","json":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX.json","graph_json":"https://pith.science/api/pith-number/FFNFWROJG2ORV7Z6DAJNRW6LKX/graph.json","events_json":"https://pith.science/api/pith-number/FFNFWROJG2ORV7Z6DAJNRW6LKX/events.json","paper":"https://pith.science/paper/FFNFWROJ"},"agent_actions":{"view_html":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX","download_json":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX.json","view_paper":"https://pith.science/paper/FFNFWROJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1705.09724&json=true","fetch_graph":"https://pith.science/api/pith-number/FFNFWROJG2ORV7Z6DAJNRW6LKX/graph.json","fetch_events":"https://pith.science/api/pith-number/FFNFWROJG2ORV7Z6DAJNRW6LKX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX/action/storage_attestation","attest_author":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX/action/author_attestation","sign_citation":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX/action/citation_signature","submit_replication":"https://pith.science/pith/FFNFWROJG2ORV7Z6DAJNRW6LKX/action/replication_record"}},"created_at":"2026-05-18T00:43:34.632586+00:00","updated_at":"2026-05-18T00:43:34.632586+00:00"}