{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2021:NC2BGDBYBT62UIXN4DAOH4BOBN","short_pith_number":"pith:NC2BGDBY","canonical_record":{"source":{"id":"2111.08191","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-11-16T02:17:49Z","cross_cats_sorted":["cs.SD","eess.AS"],"title_canon_sha256":"473c21bcbc94100625a8e88156344cb23d4840fab3a89078512b48dfe3f31d56","abstract_canon_sha256":"fb42f61924145e5e63f7f786d21791bbb712e26432b6bea88e0ab4bf09970333"},"schema_version":"1.0"},"canonical_sha256":"68b4130c380cfdaa22ede0c0e3f02e0b4885efa321987f907ad51718f7140a47","source":{"kind":"arxiv","id":"2111.08191","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2111.08191","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"arxiv_version","alias_value":"2111.08191v2","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2111.08191","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"pith_short_12","alias_value":"NC2BGDBYBT62","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"pith_short_16","alias_value":"NC2BGDBYBT62UIXN","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"pith_short_8","alias_value":"NC2BGDBY","created_at":"2026-07-05T04:35:57Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2021:NC2BGDBYBT62UIXN4DAOH4BOBN","target":"record","payload":{"canonical_record":{"source":{"id":"2111.08191","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-11-16T02:17:49Z","cross_cats_sorted":["cs.SD","eess.AS"],"title_canon_sha256":"473c21bcbc94100625a8e88156344cb23d4840fab3a89078512b48dfe3f31d56","abstract_canon_sha256":"fb42f61924145e5e63f7f786d21791bbb712e26432b6bea88e0ab4bf09970333"},"schema_version":"1.0"},"canonical_sha256":"68b4130c380cfdaa22ede0c0e3f02e0b4885efa321987f907ad51718f7140a47","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T04:35:57.578921Z","signature_b64":"p85YSRMl4/FV8u45ojUWW7AktGcinLRc4rPeAncI0S9VNBQLakMqcn1vPQdzMMCta6hBCuXaVEo+T7+rXnFPDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"68b4130c380cfdaa22ede0c0e3f02e0b4885efa321987f907ad51718f7140a47","last_reissued_at":"2026-07-05T04:35:57.578422Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T04:35:57.578422Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2111.08191","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T04:35:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gWuip7OnwvqCT3hoHuB59aFw6m8hHKI9AxjnWqrPpvEV9Ia8qFWJaCavktJRlR/yMfl2q2Y5oKBR7vXC/qkcCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:26:43.330574Z"},"content_sha256":"8b4260144289dc3ca95adfc784cd2eb1f202d0f0076c148a1558f8e4c3dd8808","schema_version":"1.0","event_id":"sha256:8b4260144289dc3ca95adfc784cd2eb1f202d0f0076c148a1558f8e4c3dd8808"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2021:NC2BGDBYBT62UIXN4DAOH4BOBN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"CoCA-MDD: A Coupled Cross-Attention based Framework for Streaming Mispronunciation Detection and Diagnosis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"Baohua Xu, Liqun Deng, Nianzu Zheng, Qun Liu, Wenyong Huang, Xiao Chen, Xin Jiang, Yasheng Wang, Yuanyuan Guo, Yu Ting Yeung","submitted_at":"2021-11-16T02:17:49Z","abstract_excerpt":"Mispronunciation detection and diagnosis (MDD) is a popular research focus in computer-aided pronunciation training (CAPT) systems. End-to-end (e2e) approaches are becoming dominant in MDD. However an e2e MDD model usually requires entire speech utterances as input context, which leads to significant time latency especially for long paragraphs. We propose a streaming e2e MDD model called CoCA-MDD. We utilize conv-transformer structure to encode input speech in a streaming manner. A coupled cross-attention (CoCA) mechanism is proposed to integrate frame-level acoustic features with encoded refe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2111.08191","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2111.08191/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T04:35:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v1D4Ts+dagkF5eY8k+hAcqw97kyGR58ce+VpbykjArm4bz0cRu87BLflTMr7IfBg+4jlIUYGhedlwYnpT+TIBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:26:43.331048Z"},"content_sha256":"bce0c1ea78ceb9abe5201d0b60736dad00adad3ac94802aaa474d5a6e3e4746d","schema_version":"1.0","event_id":"sha256:bce0c1ea78ceb9abe5201d0b60736dad00adad3ac94802aaa474d5a6e3e4746d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NC2BGDBYBT62UIXN4DAOH4BOBN/bundle.json","state_url":"https://pith.science/pith/NC2BGDBYBT62UIXN4DAOH4BOBN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NC2BGDBYBT62UIXN4DAOH4BOBN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T09:26:43Z","links":{"resolver":"https://pith.science/pith/NC2BGDBYBT62UIXN4DAOH4BOBN","bundle":"https://pith.science/pith/NC2BGDBYBT62UIXN4DAOH4BOBN/bundle.json","state":"https://pith.science/pith/NC2BGDBYBT62UIXN4DAOH4BOBN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NC2BGDBYBT62UIXN4DAOH4BOBN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2021:NC2BGDBYBT62UIXN4DAOH4BOBN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fb42f61924145e5e63f7f786d21791bbb712e26432b6bea88e0ab4bf09970333","cross_cats_sorted":["cs.SD","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-11-16T02:17:49Z","title_canon_sha256":"473c21bcbc94100625a8e88156344cb23d4840fab3a89078512b48dfe3f31d56"},"schema_version":"1.0","source":{"id":"2111.08191","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2111.08191","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"arxiv_version","alias_value":"2111.08191v2","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2111.08191","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"pith_short_12","alias_value":"NC2BGDBYBT62","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"pith_short_16","alias_value":"NC2BGDBYBT62UIXN","created_at":"2026-07-05T04:35:57Z"},{"alias_kind":"pith_short_8","alias_value":"NC2BGDBY","created_at":"2026-07-05T04:35:57Z"}],"graph_snapshots":[{"event_id":"sha256:bce0c1ea78ceb9abe5201d0b60736dad00adad3ac94802aaa474d5a6e3e4746d","target":"graph","created_at":"2026-07-05T04:35:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2111.08191/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Mispronunciation detection and diagnosis (MDD) is a popular research focus in computer-aided pronunciation training (CAPT) systems. End-to-end (e2e) approaches are becoming dominant in MDD. However an e2e MDD model usually requires entire speech utterances as input context, which leads to significant time latency especially for long paragraphs. We propose a streaming e2e MDD model called CoCA-MDD. We utilize conv-transformer structure to encode input speech in a streaming manner. A coupled cross-attention (CoCA) mechanism is proposed to integrate frame-level acoustic features with encoded refe","authors_text":"Baohua Xu, Liqun Deng, Nianzu Zheng, Qun Liu, Wenyong Huang, Xiao Chen, Xin Jiang, Yasheng Wang, Yuanyuan Guo, Yu Ting Yeung","cross_cats":["cs.SD","eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-11-16T02:17:49Z","title":"CoCA-MDD: A Coupled Cross-Attention based Framework for Streaming Mispronunciation Detection and Diagnosis"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2111.08191","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8b4260144289dc3ca95adfc784cd2eb1f202d0f0076c148a1558f8e4c3dd8808","target":"record","created_at":"2026-07-05T04:35:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fb42f61924145e5e63f7f786d21791bbb712e26432b6bea88e0ab4bf09970333","cross_cats_sorted":["cs.SD","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-11-16T02:17:49Z","title_canon_sha256":"473c21bcbc94100625a8e88156344cb23d4840fab3a89078512b48dfe3f31d56"},"schema_version":"1.0","source":{"id":"2111.08191","kind":"arxiv","version":2}},"canonical_sha256":"68b4130c380cfdaa22ede0c0e3f02e0b4885efa321987f907ad51718f7140a47","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"68b4130c380cfdaa22ede0c0e3f02e0b4885efa321987f907ad51718f7140a47","first_computed_at":"2026-07-05T04:35:57.578422Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T04:35:57.578422Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"p85YSRMl4/FV8u45ojUWW7AktGcinLRc4rPeAncI0S9VNBQLakMqcn1vPQdzMMCta6hBCuXaVEo+T7+rXnFPDQ==","signature_status":"signed_v1","signed_at":"2026-07-05T04:35:57.578921Z","signed_message":"canonical_sha256_bytes"},"source_id":"2111.08191","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8b4260144289dc3ca95adfc784cd2eb1f202d0f0076c148a1558f8e4c3dd8808","sha256:bce0c1ea78ceb9abe5201d0b60736dad00adad3ac94802aaa474d5a6e3e4746d"],"state_sha256":"c2fef4c3254553bb0fac71faadebc5140e221f4174aa9bb43295bdd4cfcd4f4f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BzoEddTGOw4rDUN+Gs1liyxk42peTPDghPfZzY+6wJlyb/9EgQN4SlTiRVPcG8whErim1iw0qRF++A5sAmijBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T09:26:43.333546Z","bundle_sha256":"7210c67eef56d339314abec9bcf0622dec846351a462b102bb8a630aa2838f07"}}