{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:R4MEIF23WRH62NRTOUVB2VEMJG","short_pith_number":"pith:R4MEIF23","schema_version":"1.0","canonical_sha256":"8f1844175bb44fed3633752a1d548c499ea0d968a2765bea86178016ce355d75","source":{"kind":"arxiv","id":"1801.07736","version":3},"attestation_state":"computed","paper":{"title":"MaskGAN: Better Text Generation via Filling in the______","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Andrew M. Dai, Ian Goodfellow, William Fedus","submitted_at":"2018-01-23T19:22:21Z","abstract_excerpt":"Neural text generation models are often autoregressive language models or seq2seq models. These models generate text by sampling words sequentially, with each word conditioned on the previous word, and are state-of-the-art for several machine translation and summarization benchmarks. These benchmarks are often defined by validation perplexity even though this is not a direct measure of the quality of the generated text. Additionally, these models are typically trained via maxi- mum likelihood and teacher forcing. These methods are well-suited to optimizing perplexity but can result in poor sam"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1801.07736","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-01-23T19:22:21Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"2700becceed089b6e6b7ff22a5f98ffb36a6312eb15b86673663e52ad434b9e3","abstract_canon_sha256":"48deb6d143fe6213b6cb08bbead961b3ffa970239f4852638255e29d6a1a27b7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:11.881556Z","signature_b64":"7c167C3F0xyxsaWbbWfVWkFZNWSUSNvVsc54qWeW2p2XeyA5nBc/Hmo2mORm/ABpQFrqLwVu9PIrZPEm4iljAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f1844175bb44fed3633752a1d548c499ea0d968a2765bea86178016ce355d75","last_reissued_at":"2026-05-18T00:22:11.880983Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:11.880983Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MaskGAN: Better Text Generation via Filling in the______","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Andrew M. Dai, Ian Goodfellow, William Fedus","submitted_at":"2018-01-23T19:22:21Z","abstract_excerpt":"Neural text generation models are often autoregressive language models or seq2seq models. These models generate text by sampling words sequentially, with each word conditioned on the previous word, and are state-of-the-art for several machine translation and summarization benchmarks. These benchmarks are often defined by validation perplexity even though this is not a direct measure of the quality of the generated text. Additionally, these models are typically trained via maxi- mum likelihood and teacher forcing. These methods are well-suited to optimizing perplexity but can result in poor sam"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.07736","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1801.07736","created_at":"2026-05-18T00:22:11.881074+00:00"},{"alias_kind":"arxiv_version","alias_value":"1801.07736v3","created_at":"2026-05-18T00:22:11.881074+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.07736","created_at":"2026-05-18T00:22:11.881074+00:00"},{"alias_kind":"pith_short_12","alias_value":"R4MEIF23WRH6","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_16","alias_value":"R4MEIF23WRH62NRT","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_8","alias_value":"R4MEIF23","created_at":"2026-05-18T12:32:50.500415+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"1907.07001","citing_title":"Latent Adversarial Defence with Boundary-guided Generation","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"1907.09987","citing_title":"Bayesian Inference with Generative Adversarial Network Priors","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"1906.08237","citing_title":"XLNet: Generalized Autoregressive Pretraining for Language Understanding","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2207.14255","citing_title":"Efficient Training of Language Models to Fill in the Middle","ref_index":110,"is_internal_anchor":true},{"citing_arxiv_id":"2207.14255","citing_title":"Efficient Training of Language Models to Fill in the Middle","ref_index":83,"is_internal_anchor":true},{"citing_arxiv_id":"2101.03961","citing_title":"Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2202.08906","citing_title":"ST-MoE: Designing Stable and Transferable Sparse Expert Models","ref_index":147,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG","json":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG.json","graph_json":"https://pith.science/api/pith-number/R4MEIF23WRH62NRTOUVB2VEMJG/graph.json","events_json":"https://pith.science/api/pith-number/R4MEIF23WRH62NRTOUVB2VEMJG/events.json","paper":"https://pith.science/paper/R4MEIF23"},"agent_actions":{"view_html":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG","download_json":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG.json","view_paper":"https://pith.science/paper/R4MEIF23","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1801.07736&json=true","fetch_graph":"https://pith.science/api/pith-number/R4MEIF23WRH62NRTOUVB2VEMJG/graph.json","fetch_events":"https://pith.science/api/pith-number/R4MEIF23WRH62NRTOUVB2VEMJG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG/action/storage_attestation","attest_author":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG/action/author_attestation","sign_citation":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG/action/citation_signature","submit_replication":"https://pith.science/pith/R4MEIF23WRH62NRTOUVB2VEMJG/action/replication_record"}},"created_at":"2026-05-18T00:22:11.881074+00:00","updated_at":"2026-05-18T00:22:11.881074+00:00"}