{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:K6AD4FY7IBGT3WIISI6JJFAPAG","short_pith_number":"pith:K6AD4FY7","schema_version":"1.0","canonical_sha256":"57803e171f404d3dd908923c94940f018cb56eb6f91ace3a809c9b5841d4bc17","source":{"kind":"arxiv","id":"1805.07467","version":2},"attestation_state":"computed","paper":{"title":"Unsupervised Cross-Modal Alignment of Speech and Text Embedding Spaces","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"James Glass, Schrasing Tong, Wei-Hung Weng, Yu-An Chung","submitted_at":"2018-05-18T22:59:18Z","abstract_excerpt":"Recent research has shown that word embedding spaces learned from text corpora of different languages can be aligned without any parallel data supervision. Inspired by the success in unsupervised cross-lingual word embeddings, in this paper we target learning a cross-modal alignment between the embedding spaces of speech and text learned from corpora of their respective modalities in an unsupervised fashion. The proposed framework learns the individual speech and text embedding spaces, and attempts to align the two spaces via adversarial training, followed by a refinement procedure. We show ho"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1805.07467","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-18T22:59:18Z","cross_cats_sorted":["cs.SD","eess.AS"],"title_canon_sha256":"75b721de32e5eca50674951450f90f966645f9a0824eca30b493ca4d7d3c5959","abstract_canon_sha256":"4b30878d32a0d93d2545bc6ae5da56fec2d9a84c92076fba5fbf5c30ecaec2af"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:05:14.052654Z","signature_b64":"3ksKabTOBD28BJcyTNbELr/weLrGTvjDk8CrEBlZGM4ny/bvoQH/SXBXqpkRPLoGghdG2W+4Q7uH8PiJQxsrDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"57803e171f404d3dd908923c94940f018cb56eb6f91ace3a809c9b5841d4bc17","last_reissued_at":"2026-05-18T00:05:14.051940Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:05:14.051940Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Unsupervised Cross-Modal Alignment of Speech and Text Embedding Spaces","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.CL","authors_text":"James Glass, Schrasing Tong, Wei-Hung Weng, Yu-An Chung","submitted_at":"2018-05-18T22:59:18Z","abstract_excerpt":"Recent research has shown that word embedding spaces learned from text corpora of different languages can be aligned without any parallel data supervision. Inspired by the success in unsupervised cross-lingual word embeddings, in this paper we target learning a cross-modal alignment between the embedding spaces of speech and text learned from corpora of their respective modalities in an unsupervised fashion. The proposed framework learns the individual speech and text embedding spaces, and attempts to align the two spaces via adversarial training, followed by a refinement procedure. We show ho"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.07467","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1805.07467","created_at":"2026-05-18T00:05:14.052040+00:00"},{"alias_kind":"arxiv_version","alias_value":"1805.07467v2","created_at":"2026-05-18T00:05:14.052040+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.07467","created_at":"2026-05-18T00:05:14.052040+00:00"},{"alias_kind":"pith_short_12","alias_value":"K6AD4FY7IBGT","created_at":"2026-05-18T12:32:33.847187+00:00"},{"alias_kind":"pith_short_16","alias_value":"K6AD4FY7IBGT3WII","created_at":"2026-05-18T12:32:33.847187+00:00"},{"alias_kind":"pith_short_8","alias_value":"K6AD4FY7","created_at":"2026-05-18T12:32:33.847187+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG","json":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG.json","graph_json":"https://pith.science/api/pith-number/K6AD4FY7IBGT3WIISI6JJFAPAG/graph.json","events_json":"https://pith.science/api/pith-number/K6AD4FY7IBGT3WIISI6JJFAPAG/events.json","paper":"https://pith.science/paper/K6AD4FY7"},"agent_actions":{"view_html":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG","download_json":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG.json","view_paper":"https://pith.science/paper/K6AD4FY7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1805.07467&json=true","fetch_graph":"https://pith.science/api/pith-number/K6AD4FY7IBGT3WIISI6JJFAPAG/graph.json","fetch_events":"https://pith.science/api/pith-number/K6AD4FY7IBGT3WIISI6JJFAPAG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG/action/storage_attestation","attest_author":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG/action/author_attestation","sign_citation":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG/action/citation_signature","submit_replication":"https://pith.science/pith/K6AD4FY7IBGT3WIISI6JJFAPAG/action/replication_record"}},"created_at":"2026-05-18T00:05:14.052040+00:00","updated_at":"2026-05-18T00:05:14.052040+00:00"}