{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:7LZYOUX7Q2QXCFUBVA25N2RKME","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4ead72a04b747817c344a079c1774e1bd7156a14401bc9fe0f8a0a639474078f","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-12-06T04:40:18Z","title_canon_sha256":"772861fa17322445ff243ba8eb555d7362f90822e26777cfda4a27da0689c7b8"},"schema_version":"1.0","source":{"id":"1412.2197","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1412.2197","created_at":"2026-05-18T01:59:58Z"},{"alias_kind":"arxiv_version","alias_value":"1412.2197v3","created_at":"2026-05-18T01:59:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1412.2197","created_at":"2026-05-18T01:59:58Z"},{"alias_kind":"pith_short_12","alias_value":"7LZYOUX7Q2QX","created_at":"2026-05-18T12:28:19Z"},{"alias_kind":"pith_short_16","alias_value":"7LZYOUX7Q2QXCFUB","created_at":"2026-05-18T12:28:19Z"},{"alias_kind":"pith_short_8","alias_value":"7LZYOUX7","created_at":"2026-05-18T12:28:19Z"}],"graph_snapshots":[{"event_id":"sha256:6a45ea2853cbe9c5da9ff8138fb1072b19e41e16108844e0552211b42990e9e0","target":"graph","created_at":"2026-05-18T01:59:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Synonym extraction is an important task in natural language processing and often used as a submodule in query expansion, question answering and other applications. Automatic synonym extractor is highly preferred for large scale applications. Previous studies in synonym extraction are most limited to small scale datasets. In this paper, we build a large dataset with 3.4 million synonym/non-synonym pairs to capture the challenges in real world scenarios. We proposed (1) a new cost function to accommodate the unbalanced learning problem, and (2) a feature learning based deep neural network to mod","authors_text":"Chang Wang, Liangliang Cao","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-12-06T04:40:18Z","title":"Practice in Synonym Extraction at Large Scale"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1412.2197","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ac5f3c90ce87974dad03d3208c6fe6e0ff80d18be513d0dc19cfb7770a4a592f","target":"record","created_at":"2026-05-18T01:59:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4ead72a04b747817c344a079c1774e1bd7156a14401bc9fe0f8a0a639474078f","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-12-06T04:40:18Z","title_canon_sha256":"772861fa17322445ff243ba8eb555d7362f90822e26777cfda4a27da0689c7b8"},"schema_version":"1.0","source":{"id":"1412.2197","kind":"arxiv","version":3}},"canonical_sha256":"faf38752ff86a1711681a835d6ea2a6120e514da5c5a4cefb3302efe6808c7d4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"faf38752ff86a1711681a835d6ea2a6120e514da5c5a4cefb3302efe6808c7d4","first_computed_at":"2026-05-18T01:59:58.056653Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:59:58.056653Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"nDaPVF74P/eM8aZjavr8q5mCdz8hIqdiBgaAJl/hM5xZIWA3yh0KKG4+QN8WfZ6On6XwuRLtXzGz7fagwpvQCA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:59:58.057344Z","signed_message":"canonical_sha256_bytes"},"source_id":"1412.2197","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ac5f3c90ce87974dad03d3208c6fe6e0ff80d18be513d0dc19cfb7770a4a592f","sha256:6a45ea2853cbe9c5da9ff8138fb1072b19e41e16108844e0552211b42990e9e0"],"state_sha256":"54bad3fd128b92fd87dee0bf4ad44ed67b9f68acf51d9778fecf74f25c9d18ad"}