{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:2OY4AJXYFYHCSVKV6LJBKV4IKG","short_pith_number":"pith:2OY4AJXY","schema_version":"1.0","canonical_sha256":"d3b1c026f82e0e295555f2d215578851b0cd9cc65edf8c36d7af850f03c9e2f4","source":{"kind":"arxiv","id":"1902.11269","version":1},"attestation_state":"computed","paper":{"title":"Efficient Contextual Representation Learning Without Softmax Layer","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Cho-Jui Hsieh, Kai-Wei Chang, Liunian Harold Li, Patrick H. Chen","submitted_at":"2019-02-28T18:19:14Z","abstract_excerpt":"Contextual representation models have achieved great success in improving various downstream tasks. However, these language-model-based encoders are difficult to train due to the large parameter sizes and high computational complexity. By carefully examining the training procedure, we find that the softmax layer (the output layer) causes significant inefficiency due to the large vocabulary size. Therefore, we redesign the learning objective and propose an efficient framework for training contextual representation models. Specifically, the proposed approach bypasses the softmax layer by perform"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.11269","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-02-28T18:19:14Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"cab6a1523b9b42392d21c85b04a5f0c29a46a5876e8ba0ddd3753a5b5230bf17","abstract_canon_sha256":"bbfef06787a38105193341464cdf1be20af61e291f2b83e799b55870bd62f3de"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:25.050055Z","signature_b64":"OkKFi43J2BGujB15SwY8POvDud5Y88CEawYjiSFappyVveUP++Xp6QiJXRCLRKbEQuqXHnOtlV734O2u5efwAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d3b1c026f82e0e295555f2d215578851b0cd9cc65edf8c36d7af850f03c9e2f4","last_reissued_at":"2026-05-17T23:52:25.049548Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:25.049548Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Efficient Contextual Representation Learning Without Softmax Layer","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Cho-Jui Hsieh, Kai-Wei Chang, Liunian Harold Li, Patrick H. Chen","submitted_at":"2019-02-28T18:19:14Z","abstract_excerpt":"Contextual representation models have achieved great success in improving various downstream tasks. However, these language-model-based encoders are difficult to train due to the large parameter sizes and high computational complexity. By carefully examining the training procedure, we find that the softmax layer (the output layer) causes significant inefficiency due to the large vocabulary size. Therefore, we redesign the learning objective and propose an efficient framework for training contextual representation models. Specifically, the proposed approach bypasses the softmax layer by perform"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.11269","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.11269","created_at":"2026-05-17T23:52:25.049630+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.11269v1","created_at":"2026-05-17T23:52:25.049630+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.11269","created_at":"2026-05-17T23:52:25.049630+00:00"},{"alias_kind":"pith_short_12","alias_value":"2OY4AJXYFYHC","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_16","alias_value":"2OY4AJXYFYHCSVKV","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_8","alias_value":"2OY4AJXY","created_at":"2026-05-18T12:33:07.085635+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG","json":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG.json","graph_json":"https://pith.science/api/pith-number/2OY4AJXYFYHCSVKV6LJBKV4IKG/graph.json","events_json":"https://pith.science/api/pith-number/2OY4AJXYFYHCSVKV6LJBKV4IKG/events.json","paper":"https://pith.science/paper/2OY4AJXY"},"agent_actions":{"view_html":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG","download_json":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG.json","view_paper":"https://pith.science/paper/2OY4AJXY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.11269&json=true","fetch_graph":"https://pith.science/api/pith-number/2OY4AJXYFYHCSVKV6LJBKV4IKG/graph.json","fetch_events":"https://pith.science/api/pith-number/2OY4AJXYFYHCSVKV6LJBKV4IKG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG/action/storage_attestation","attest_author":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG/action/author_attestation","sign_citation":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG/action/citation_signature","submit_replication":"https://pith.science/pith/2OY4AJXYFYHCSVKV6LJBKV4IKG/action/replication_record"}},"created_at":"2026-05-17T23:52:25.049630+00:00","updated_at":"2026-05-17T23:52:25.049630+00:00"}