{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:5T5EGICVJJFLS72EODG6TTM52I","short_pith_number":"pith:5T5EGICV","canonical_record":{"source":{"id":"1811.04623","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-12T09:43:24Z","cross_cats_sorted":[],"title_canon_sha256":"a16cae281767457cf64da7aa73868d635dfde0f06060ab4179a46a20c2584561","abstract_canon_sha256":"b4eee6bbcf48e68aa6a69fe985ff1f8905f78b57c8d92c1af4d45017ae8d611d"},"schema_version":"1.0"},"canonical_sha256":"ecfa4320554a4ab97f4470cde9cd9dd21871d7d8af0c5a635c7f8c6d08bd8c1a","source":{"kind":"arxiv","id":"1811.04623","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04623","created_at":"2026-05-17T23:56:22Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04623v2","created_at":"2026-05-17T23:56:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04623","created_at":"2026-05-17T23:56:22Z"},{"alias_kind":"pith_short_12","alias_value":"5T5EGICVJJFL","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_16","alias_value":"5T5EGICVJJFLS72E","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_8","alias_value":"5T5EGICV","created_at":"2026-05-18T12:32:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:5T5EGICVJJFLS72EODG6TTM52I","target":"record","payload":{"canonical_record":{"source":{"id":"1811.04623","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-12T09:43:24Z","cross_cats_sorted":[],"title_canon_sha256":"a16cae281767457cf64da7aa73868d635dfde0f06060ab4179a46a20c2584561","abstract_canon_sha256":"b4eee6bbcf48e68aa6a69fe985ff1f8905f78b57c8d92c1af4d45017ae8d611d"},"schema_version":"1.0"},"canonical_sha256":"ecfa4320554a4ab97f4470cde9cd9dd21871d7d8af0c5a635c7f8c6d08bd8c1a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:56:22.962664Z","signature_b64":"sopRjDyuCu1p+57Pms7e94xXvIly3y/0pEbTPfotbM9IGx8WTxvrABfs1e7lR8+QD71KsfD781ku2bwKLPu2AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ecfa4320554a4ab97f4470cde9cd9dd21871d7d8af0c5a635c7f8c6d08bd8c1a","last_reissued_at":"2026-05-17T23:56:22.962061Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:56:22.962061Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.04623","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:56:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YeSnUq7FrnhU0CpPqRaM8nOi+r4WqLzZEOESS3indHM4d+adw8E8UhpF9NNjtQoglwl/7omuNjH+y/kUa7PJBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-20T22:38:08.552684Z"},"content_sha256":"0d376fc73938190d0749c45a563059629bbc80b9564186ce9689d9fa8d163c40","schema_version":"1.0","event_id":"sha256:0d376fc73938190d0749c45a563059629bbc80b9564186ce9689d9fa8d163c40"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:5T5EGICVJJFLS72EODG6TTM52I","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Fine-tuning of Language Models with Discriminator","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Mikhail Kudinov, Vadim Popov","submitted_at":"2018-11-12T09:43:24Z","abstract_excerpt":"Cross-entropy loss is a common choice when it comes to multiclass classification tasks and language modeling in particular. Minimizing this loss results in language models of very good quality. We show that it is possible to fine-tune these models and make them perform even better if they are fine-tuned with sum of cross-entropy loss and reverse Kullback-Leibler divergence. The latter is estimated using discriminator network that we train in advance. During fine-tuning probabilities of rare words that are usually underestimated by language models become bigger. The novel approach that we propo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04623","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:56:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5M1YU1dltdheYGzigeH63x+3hkJCRUkmP+G8NwjrvFOlFxfaoz6zPFE9V+XPtnKwreP7ZqFy+r9vyUj5MijiCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-20T22:38:08.553051Z"},"content_sha256":"714a02d77a803d73951977b535663c2a08904f396d6accaca395a14677d0f73b","schema_version":"1.0","event_id":"sha256:714a02d77a803d73951977b535663c2a08904f396d6accaca395a14677d0f73b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5T5EGICVJJFLS72EODG6TTM52I/bundle.json","state_url":"https://pith.science/pith/5T5EGICVJJFLS72EODG6TTM52I/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5T5EGICVJJFLS72EODG6TTM52I/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-20T22:38:08Z","links":{"resolver":"https://pith.science/pith/5T5EGICVJJFLS72EODG6TTM52I","bundle":"https://pith.science/pith/5T5EGICVJJFLS72EODG6TTM52I/bundle.json","state":"https://pith.science/pith/5T5EGICVJJFLS72EODG6TTM52I/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5T5EGICVJJFLS72EODG6TTM52I/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:5T5EGICVJJFLS72EODG6TTM52I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b4eee6bbcf48e68aa6a69fe985ff1f8905f78b57c8d92c1af4d45017ae8d611d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-12T09:43:24Z","title_canon_sha256":"a16cae281767457cf64da7aa73868d635dfde0f06060ab4179a46a20c2584561"},"schema_version":"1.0","source":{"id":"1811.04623","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.04623","created_at":"2026-05-17T23:56:22Z"},{"alias_kind":"arxiv_version","alias_value":"1811.04623v2","created_at":"2026-05-17T23:56:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04623","created_at":"2026-05-17T23:56:22Z"},{"alias_kind":"pith_short_12","alias_value":"5T5EGICVJJFL","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_16","alias_value":"5T5EGICVJJFLS72E","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_8","alias_value":"5T5EGICV","created_at":"2026-05-18T12:32:08Z"}],"graph_snapshots":[{"event_id":"sha256:714a02d77a803d73951977b535663c2a08904f396d6accaca395a14677d0f73b","target":"graph","created_at":"2026-05-17T23:56:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Cross-entropy loss is a common choice when it comes to multiclass classification tasks and language modeling in particular. Minimizing this loss results in language models of very good quality. We show that it is possible to fine-tune these models and make them perform even better if they are fine-tuned with sum of cross-entropy loss and reverse Kullback-Leibler divergence. The latter is estimated using discriminator network that we train in advance. During fine-tuning probabilities of rare words that are usually underestimated by language models become bigger. The novel approach that we propo","authors_text":"Mikhail Kudinov, Vadim Popov","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-12T09:43:24Z","title":"Fine-tuning of Language Models with Discriminator"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04623","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0d376fc73938190d0749c45a563059629bbc80b9564186ce9689d9fa8d163c40","target":"record","created_at":"2026-05-17T23:56:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b4eee6bbcf48e68aa6a69fe985ff1f8905f78b57c8d92c1af4d45017ae8d611d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-12T09:43:24Z","title_canon_sha256":"a16cae281767457cf64da7aa73868d635dfde0f06060ab4179a46a20c2584561"},"schema_version":"1.0","source":{"id":"1811.04623","kind":"arxiv","version":2}},"canonical_sha256":"ecfa4320554a4ab97f4470cde9cd9dd21871d7d8af0c5a635c7f8c6d08bd8c1a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ecfa4320554a4ab97f4470cde9cd9dd21871d7d8af0c5a635c7f8c6d08bd8c1a","first_computed_at":"2026-05-17T23:56:22.962061Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:56:22.962061Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sopRjDyuCu1p+57Pms7e94xXvIly3y/0pEbTPfotbM9IGx8WTxvrABfs1e7lR8+QD71KsfD781ku2bwKLPu2AQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:56:22.962664Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.04623","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0d376fc73938190d0749c45a563059629bbc80b9564186ce9689d9fa8d163c40","sha256:714a02d77a803d73951977b535663c2a08904f396d6accaca395a14677d0f73b"],"state_sha256":"df4b4e8071bbec7919e71d635da5662573ce83e528df2653dd2f40c1525daa7f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CEUxe8SzLWtwFfm79QZiAOxkaKzv/Web7t1UlGhXiJ3zlMWqww3UqraKKrHSHNzpfH73i48PPnDKJ29hN8T+Cg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-20T22:38:08.555028Z","bundle_sha256":"70f3cc3ec9af42f473ad1d24297b326551d3994fa8b1d84334f3208241de1b65"}}