{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:ERNGIL7ZEK7D27LBJP2XMWGM2Q","short_pith_number":"pith:ERNGIL7Z","schema_version":"1.0","canonical_sha256":"245a642ff922be3d7d614bf57658ccd4019cb9ebf7e1f1f5d9a7b1983ced36dc","source":{"kind":"arxiv","id":"1702.03859","version":1},"attestation_state":"computed","paper":{"title":"Offline bilingual word vectors, orthogonal transformations and the inverted softmax","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"David H. P. Turban, Nils Y. Hammerla, Samuel L. Smith, Steven Hamblin","submitted_at":"2017-02-13T16:31:06Z","abstract_excerpt":"Usually bilingual word vectors are trained \"online\". Mikolov et al. showed they can also be found \"offline\", whereby two pre-trained embeddings are aligned with a linear transformation, using dictionaries compiled from expert knowledge. In this work, we prove that the linear transformation between two spaces should be orthogonal. This transformation can be obtained using the singular value decomposition. We introduce a novel \"inverted softmax\" for identifying translation pairs, with which we improve the precision @1 of Mikolov's original mapping from 34% to 43%, when translating a test set com"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1702.03859","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-02-13T16:31:06Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"276499ac859a61c898dc30b303257300c7e3a24b5835fbda909fb031f064e71f","abstract_canon_sha256":"bf77f0e591dfe8fabe28cccf4ee1d3365b93cc3c01dd3eee0fdfeb7f2039607f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:50:52.859614Z","signature_b64":"ghtGpQPZl/Q7GuYtfmub/rZTiVScO3kL81v28fo1lfGb8QFRhTaDPf4Dhi/V7pdBQyxm+QWCcuJWoeYFnFraAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"245a642ff922be3d7d614bf57658ccd4019cb9ebf7e1f1f5d9a7b1983ced36dc","last_reissued_at":"2026-05-18T00:50:52.859191Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:50:52.859191Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Offline bilingual word vectors, orthogonal transformations and the inverted softmax","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"David H. P. Turban, Nils Y. Hammerla, Samuel L. Smith, Steven Hamblin","submitted_at":"2017-02-13T16:31:06Z","abstract_excerpt":"Usually bilingual word vectors are trained \"online\". Mikolov et al. showed they can also be found \"offline\", whereby two pre-trained embeddings are aligned with a linear transformation, using dictionaries compiled from expert knowledge. In this work, we prove that the linear transformation between two spaces should be orthogonal. This transformation can be obtained using the singular value decomposition. We introduce a novel \"inverted softmax\" for identifying translation pairs, with which we improve the precision @1 of Mikolov's original mapping from 34% to 43%, when translating a test set com"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.03859","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1702.03859","created_at":"2026-05-18T00:50:52.859250+00:00"},{"alias_kind":"arxiv_version","alias_value":"1702.03859v1","created_at":"2026-05-18T00:50:52.859250+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.03859","created_at":"2026-05-18T00:50:52.859250+00:00"},{"alias_kind":"pith_short_12","alias_value":"ERNGIL7ZEK7D","created_at":"2026-05-18T12:31:12.930513+00:00"},{"alias_kind":"pith_short_16","alias_value":"ERNGIL7ZEK7D27LB","created_at":"2026-05-18T12:31:12.930513+00:00"},{"alias_kind":"pith_short_8","alias_value":"ERNGIL7Z","created_at":"2026-05-18T12:31:12.930513+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1906.09543","citing_title":"Cross-lingual Data Transformation and Combination for Text Classification","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"1907.00544","citing_title":"Unsupervised Adversarial Graph Alignment with Graph Embedding","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14815","citing_title":"Domain Fine-Tuning FinBERT on Finnish Histopathological Reports: Train-Time Signals and Downstream Correlations","ref_index":32,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q","json":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q.json","graph_json":"https://pith.science/api/pith-number/ERNGIL7ZEK7D27LBJP2XMWGM2Q/graph.json","events_json":"https://pith.science/api/pith-number/ERNGIL7ZEK7D27LBJP2XMWGM2Q/events.json","paper":"https://pith.science/paper/ERNGIL7Z"},"agent_actions":{"view_html":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q","download_json":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q.json","view_paper":"https://pith.science/paper/ERNGIL7Z","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1702.03859&json=true","fetch_graph":"https://pith.science/api/pith-number/ERNGIL7ZEK7D27LBJP2XMWGM2Q/graph.json","fetch_events":"https://pith.science/api/pith-number/ERNGIL7ZEK7D27LBJP2XMWGM2Q/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q/action/storage_attestation","attest_author":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q/action/author_attestation","sign_citation":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q/action/citation_signature","submit_replication":"https://pith.science/pith/ERNGIL7ZEK7D27LBJP2XMWGM2Q/action/replication_record"}},"created_at":"2026-05-18T00:50:52.859250+00:00","updated_at":"2026-05-18T00:50:52.859250+00:00"}