{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:AKF5DVTLF76EHSV7UQQIMGX22B","short_pith_number":"pith:AKF5DVTL","canonical_record":{"source":{"id":"1507.05523","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-20T15:07:53Z","cross_cats_sorted":[],"title_canon_sha256":"b341892ea803cb57681f1d092dd783637da8b48245e4d93f473765fb37e5e0ce","abstract_canon_sha256":"2de8ee52d0dca9d43de3c3bbfb589b72c0824889ae9a7c37e941308df19498a0"},"schema_version":"1.0"},"canonical_sha256":"028bd1d66b2ffc43cabfa420861afad0543f3502f4e3d5172134f1d194a0d3b6","source":{"kind":"arxiv","id":"1507.05523","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1507.05523","created_at":"2026-05-18T01:36:36Z"},{"alias_kind":"arxiv_version","alias_value":"1507.05523v1","created_at":"2026-05-18T01:36:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.05523","created_at":"2026-05-18T01:36:36Z"},{"alias_kind":"pith_short_12","alias_value":"AKF5DVTLF76E","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_16","alias_value":"AKF5DVTLF76EHSV7","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_8","alias_value":"AKF5DVTL","created_at":"2026-05-18T12:29:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:AKF5DVTLF76EHSV7UQQIMGX22B","target":"record","payload":{"canonical_record":{"source":{"id":"1507.05523","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-20T15:07:53Z","cross_cats_sorted":[],"title_canon_sha256":"b341892ea803cb57681f1d092dd783637da8b48245e4d93f473765fb37e5e0ce","abstract_canon_sha256":"2de8ee52d0dca9d43de3c3bbfb589b72c0824889ae9a7c37e941308df19498a0"},"schema_version":"1.0"},"canonical_sha256":"028bd1d66b2ffc43cabfa420861afad0543f3502f4e3d5172134f1d194a0d3b6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:36:36.598126Z","signature_b64":"JNQEclmIZKUJRsbmTebcd5HW1VLWXM0P8xwJDyN2rrYIFuY48WKTh6JtmaKPorIUMIEcYEk+QbU+j9ti3rajAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"028bd1d66b2ffc43cabfa420861afad0543f3502f4e3d5172134f1d194a0d3b6","last_reissued_at":"2026-05-18T01:36:36.597623Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:36:36.597623Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1507.05523","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:36:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YLbl5KIwGsTUSQnRcXQbsjQe/h6mgrNxAsUcJ3Eqtil4dDauXRkjc8pZ32McMmrGju1biu7loZ2hTwyG5v0UCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T18:14:13.827849Z"},"content_sha256":"90a4d1395247c9713675b1471d668fe3ef94db30f189e3ad488a99038a5c581b","schema_version":"1.0","event_id":"sha256:90a4d1395247c9713675b1471d668fe3ef94db30f189e3ad488a99038a5c581b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:AKF5DVTLF76EHSV7UQQIMGX22B","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"How to Generate a Good Word Embedding?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Jun Zhao, Kang Liu, Liheng Xu, Siwei Lai","submitted_at":"2015-07-20T15:07:53Z","abstract_excerpt":"We analyze three critical components of word embedding training: the model, the corpus, and the training parameters. We systematize existing neural-network-based word embedding algorithms and compare them using the same corpus. We evaluate each word embedding in three ways: analyzing its semantic properties, using it as a feature for supervised tasks and using it to initialize neural networks. We also provide several simple guidelines for training word embeddings. First, we discover that corpus domain is more important than corpus size. We recommend choosing a corpus in a suitable domain for t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.05523","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:36:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"g2SF6488yE554BRM1NudfP0i+loHeJ8mtTMXu2R5leXK7Cng8iZ0LXdVy3MG0M0PBGoDgBz1v9J8Xh2b3zPeAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T18:14:13.828206Z"},"content_sha256":"7ea613e5bb057b6330cbc3327c2bb32a27125791134930b1f1cafa410198189e","schema_version":"1.0","event_id":"sha256:7ea613e5bb057b6330cbc3327c2bb32a27125791134930b1f1cafa410198189e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AKF5DVTLF76EHSV7UQQIMGX22B/bundle.json","state_url":"https://pith.science/pith/AKF5DVTLF76EHSV7UQQIMGX22B/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AKF5DVTLF76EHSV7UQQIMGX22B/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T18:14:13Z","links":{"resolver":"https://pith.science/pith/AKF5DVTLF76EHSV7UQQIMGX22B","bundle":"https://pith.science/pith/AKF5DVTLF76EHSV7UQQIMGX22B/bundle.json","state":"https://pith.science/pith/AKF5DVTLF76EHSV7UQQIMGX22B/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AKF5DVTLF76EHSV7UQQIMGX22B/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:AKF5DVTLF76EHSV7UQQIMGX22B","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2de8ee52d0dca9d43de3c3bbfb589b72c0824889ae9a7c37e941308df19498a0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-20T15:07:53Z","title_canon_sha256":"b341892ea803cb57681f1d092dd783637da8b48245e4d93f473765fb37e5e0ce"},"schema_version":"1.0","source":{"id":"1507.05523","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1507.05523","created_at":"2026-05-18T01:36:36Z"},{"alias_kind":"arxiv_version","alias_value":"1507.05523v1","created_at":"2026-05-18T01:36:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.05523","created_at":"2026-05-18T01:36:36Z"},{"alias_kind":"pith_short_12","alias_value":"AKF5DVTLF76E","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_16","alias_value":"AKF5DVTLF76EHSV7","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_8","alias_value":"AKF5DVTL","created_at":"2026-05-18T12:29:10Z"}],"graph_snapshots":[{"event_id":"sha256:7ea613e5bb057b6330cbc3327c2bb32a27125791134930b1f1cafa410198189e","target":"graph","created_at":"2026-05-18T01:36:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We analyze three critical components of word embedding training: the model, the corpus, and the training parameters. We systematize existing neural-network-based word embedding algorithms and compare them using the same corpus. We evaluate each word embedding in three ways: analyzing its semantic properties, using it as a feature for supervised tasks and using it to initialize neural networks. We also provide several simple guidelines for training word embeddings. First, we discover that corpus domain is more important than corpus size. We recommend choosing a corpus in a suitable domain for t","authors_text":"Jun Zhao, Kang Liu, Liheng Xu, Siwei Lai","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-20T15:07:53Z","title":"How to Generate a Good Word Embedding?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.05523","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:90a4d1395247c9713675b1471d668fe3ef94db30f189e3ad488a99038a5c581b","target":"record","created_at":"2026-05-18T01:36:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2de8ee52d0dca9d43de3c3bbfb589b72c0824889ae9a7c37e941308df19498a0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-20T15:07:53Z","title_canon_sha256":"b341892ea803cb57681f1d092dd783637da8b48245e4d93f473765fb37e5e0ce"},"schema_version":"1.0","source":{"id":"1507.05523","kind":"arxiv","version":1}},"canonical_sha256":"028bd1d66b2ffc43cabfa420861afad0543f3502f4e3d5172134f1d194a0d3b6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"028bd1d66b2ffc43cabfa420861afad0543f3502f4e3d5172134f1d194a0d3b6","first_computed_at":"2026-05-18T01:36:36.597623Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:36:36.597623Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JNQEclmIZKUJRsbmTebcd5HW1VLWXM0P8xwJDyN2rrYIFuY48WKTh6JtmaKPorIUMIEcYEk+QbU+j9ti3rajAg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:36:36.598126Z","signed_message":"canonical_sha256_bytes"},"source_id":"1507.05523","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:90a4d1395247c9713675b1471d668fe3ef94db30f189e3ad488a99038a5c581b","sha256:7ea613e5bb057b6330cbc3327c2bb32a27125791134930b1f1cafa410198189e"],"state_sha256":"fec06e983bb3f4d2421a448541525e6ec2929c4d518513c652a621dd333fbd6a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"33VHj9BNTkiuweUWOUT6xTHFu8IRskvR1NHb3Q/O+K+TIcfy/qKyRlQCMFK7Q3VP8EQ+V+ZXa0aDcLv1cc9UDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T18:14:13.830399Z","bundle_sha256":"7c8ebaaebd4ee3b47b839c35827e1b15bb035adec7b75a48a0bcb37a80773afb"}}