{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:M74LRIM5LR5W64MQJV2RD4KGWR","short_pith_number":"pith:M74LRIM5","canonical_record":{"source":{"id":"1711.07274","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-20T12:07:22Z","cross_cats_sorted":["cs.SD","eess.AS","stat.ML"],"title_canon_sha256":"32e210e552143cfeaf433e49395bcb892b0dffeb26ac35956fea4f00bda2b70d","abstract_canon_sha256":"01d0e71352e7f5700850a6b9dc57b23e28f66a5297bcddc27ca937726ee39055"},"schema_version":"1.0"},"canonical_sha256":"67f8b8a19d5c7b6f71904d7511f146b47ca99f685ce339668cf78df5ba37075a","source":{"kind":"arxiv","id":"1711.07274","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.07274","created_at":"2026-05-18T00:12:48Z"},{"alias_kind":"arxiv_version","alias_value":"1711.07274v2","created_at":"2026-05-18T00:12:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.07274","created_at":"2026-05-18T00:12:48Z"},{"alias_kind":"pith_short_12","alias_value":"M74LRIM5LR5W","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"M74LRIM5LR5W64MQ","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"M74LRIM5","created_at":"2026-05-18T12:31:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:M74LRIM5LR5W64MQJV2RD4KGWR","target":"record","payload":{"canonical_record":{"source":{"id":"1711.07274","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-20T12:07:22Z","cross_cats_sorted":["cs.SD","eess.AS","stat.ML"],"title_canon_sha256":"32e210e552143cfeaf433e49395bcb892b0dffeb26ac35956fea4f00bda2b70d","abstract_canon_sha256":"01d0e71352e7f5700850a6b9dc57b23e28f66a5297bcddc27ca937726ee39055"},"schema_version":"1.0"},"canonical_sha256":"67f8b8a19d5c7b6f71904d7511f146b47ca99f685ce339668cf78df5ba37075a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:12:48.600090Z","signature_b64":"YRKuq+GXfbEih9RDKQjCnB3NzPBzAjXH3fS+dB1JGCRU1Wv3qRfmDoXo/KG1WU+60J6PRV3p5+/KKMZQLhIwCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"67f8b8a19d5c7b6f71904d7511f146b47ca99f685ce339668cf78df5ba37075a","last_reissued_at":"2026-05-18T00:12:48.599378Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:12:48.599378Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1711.07274","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:12:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VFYHrxh/QKv6CCD/2UpxFu9KnszYQIcxxcWft2bYq2L+0E34rt9IUwPKbE/4bQuSWaPE0Y4GMgrTtTv/bNnnDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:26:39.567819Z"},"content_sha256":"047cb99711588146e83d5e017d4e50e56d995295a4dff09a28dc43d07a91ad0a","schema_version":"1.0","event_id":"sha256:047cb99711588146e83d5e017d4e50e56d995295a4dff09a28dc43d07a91ad0a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:M74LRIM5LR5W64MQJV2RD4KGWR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Speech recognition for medical conversations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SD","eess.AS","stat.ML"],"primary_cat":"cs.CL","authors_text":"Ananth Sankar, Anjuli Kannan, Anshuman Tripathi, Chris Co, Chung-Cheng Chiu, Diana Jaunzeikare, Hasim Sak, Justin Tansuwan, Katherine Chou, Nathan Wan, Navdeep Jaitly, Patrick Nguyen, Xuedong Zhang, Yonghui Wu","submitted_at":"2017-11-20T12:07:22Z","abstract_excerpt":"In this work we explored building automatic speech recognition models for transcribing doctor patient conversation. We collected a large scale dataset of clinical conversations ($14,000$ hr), designed the task to represent the real word scenario, and explored several alignment approaches to iteratively improve data quality. We explored both CTC and LAS systems for building speech recognition models. The LAS was more resilient to noisy data and CTC required more data clean up. A detailed analysis is provided for understanding the performance for clinical tasks. Our analysis showed the speech re"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.07274","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:12:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ekylH/sBcZFUpF1IcLsanC2P7pm3oTO1FfXKGdvEvhpK6azU831e60oJX1iIBr3uuczwBioj+jDtbMWlSr53DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:26:39.568570Z"},"content_sha256":"ba2f5727c481983ea9af0a750c3c41e71b5da9c704fbcf01f204ed0a3c2e022f","schema_version":"1.0","event_id":"sha256:ba2f5727c481983ea9af0a750c3c41e71b5da9c704fbcf01f204ed0a3c2e022f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/M74LRIM5LR5W64MQJV2RD4KGWR/bundle.json","state_url":"https://pith.science/pith/M74LRIM5LR5W64MQJV2RD4KGWR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/M74LRIM5LR5W64MQJV2RD4KGWR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T17:26:39Z","links":{"resolver":"https://pith.science/pith/M74LRIM5LR5W64MQJV2RD4KGWR","bundle":"https://pith.science/pith/M74LRIM5LR5W64MQJV2RD4KGWR/bundle.json","state":"https://pith.science/pith/M74LRIM5LR5W64MQJV2RD4KGWR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/M74LRIM5LR5W64MQJV2RD4KGWR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:M74LRIM5LR5W64MQJV2RD4KGWR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"01d0e71352e7f5700850a6b9dc57b23e28f66a5297bcddc27ca937726ee39055","cross_cats_sorted":["cs.SD","eess.AS","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-20T12:07:22Z","title_canon_sha256":"32e210e552143cfeaf433e49395bcb892b0dffeb26ac35956fea4f00bda2b70d"},"schema_version":"1.0","source":{"id":"1711.07274","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.07274","created_at":"2026-05-18T00:12:48Z"},{"alias_kind":"arxiv_version","alias_value":"1711.07274v2","created_at":"2026-05-18T00:12:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.07274","created_at":"2026-05-18T00:12:48Z"},{"alias_kind":"pith_short_12","alias_value":"M74LRIM5LR5W","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"M74LRIM5LR5W64MQ","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"M74LRIM5","created_at":"2026-05-18T12:31:31Z"}],"graph_snapshots":[{"event_id":"sha256:ba2f5727c481983ea9af0a750c3c41e71b5da9c704fbcf01f204ed0a3c2e022f","target":"graph","created_at":"2026-05-18T00:12:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this work we explored building automatic speech recognition models for transcribing doctor patient conversation. We collected a large scale dataset of clinical conversations ($14,000$ hr), designed the task to represent the real word scenario, and explored several alignment approaches to iteratively improve data quality. We explored both CTC and LAS systems for building speech recognition models. The LAS was more resilient to noisy data and CTC required more data clean up. A detailed analysis is provided for understanding the performance for clinical tasks. Our analysis showed the speech re","authors_text":"Ananth Sankar, Anjuli Kannan, Anshuman Tripathi, Chris Co, Chung-Cheng Chiu, Diana Jaunzeikare, Hasim Sak, Justin Tansuwan, Katherine Chou, Nathan Wan, Navdeep Jaitly, Patrick Nguyen, Xuedong Zhang, Yonghui Wu","cross_cats":["cs.SD","eess.AS","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-20T12:07:22Z","title":"Speech recognition for medical conversations"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.07274","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:047cb99711588146e83d5e017d4e50e56d995295a4dff09a28dc43d07a91ad0a","target":"record","created_at":"2026-05-18T00:12:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"01d0e71352e7f5700850a6b9dc57b23e28f66a5297bcddc27ca937726ee39055","cross_cats_sorted":["cs.SD","eess.AS","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-11-20T12:07:22Z","title_canon_sha256":"32e210e552143cfeaf433e49395bcb892b0dffeb26ac35956fea4f00bda2b70d"},"schema_version":"1.0","source":{"id":"1711.07274","kind":"arxiv","version":2}},"canonical_sha256":"67f8b8a19d5c7b6f71904d7511f146b47ca99f685ce339668cf78df5ba37075a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"67f8b8a19d5c7b6f71904d7511f146b47ca99f685ce339668cf78df5ba37075a","first_computed_at":"2026-05-18T00:12:48.599378Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:12:48.599378Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"YRKuq+GXfbEih9RDKQjCnB3NzPBzAjXH3fS+dB1JGCRU1Wv3qRfmDoXo/KG1WU+60J6PRV3p5+/KKMZQLhIwCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:12:48.600090Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.07274","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:047cb99711588146e83d5e017d4e50e56d995295a4dff09a28dc43d07a91ad0a","sha256:ba2f5727c481983ea9af0a750c3c41e71b5da9c704fbcf01f204ed0a3c2e022f"],"state_sha256":"77cc6b2a0f409a5fab2125c14f8f93e410b0718bb0d4753cdfba7fbf205d51dc"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YRrSJ8/WgwPmtvRQ1AsTRWo2PMjeYL0qNn8+UuhTllncoJTl0TX7RVwuUOvrf0311uP4sfq/ExYo6T5srpTRAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T17:26:39.570985Z","bundle_sha256":"beb75ff914799a0e4c13de6be412a3766036d13432e792ec09fa22955509e581"}}