{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2022:OLB7QIXLUPLJL74WIPMKPDRIPZ","short_pith_number":"pith:OLB7QIXL","schema_version":"1.0","canonical_sha256":"72c3f822eba3d695ff9643d8a78e287e4dcfdf2c8afceb38881842534eaa501e","source":{"kind":"arxiv","id":"2210.15257","version":2},"attestation_state":"computed","paper":{"title":"ERNIE-ViLG 2.0: Improving Text-to-Image Diffusion Model with Knowledge-Enhanced Mixture-of-Denoising-Experts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"HaiFeng Wang, Hao Tian, Hua Wu, Jiaxiang Liu, Lanxin Li, Li Chen, Shikun Feng, Weichong Yin, Xintong Yu, Xuyi Chen, Yewei Fang, Yu Sun, Yuxiang Lu, Zhenyu Zhang, Zhida Feng","submitted_at":"2022-10-27T08:21:35Z","abstract_excerpt":"Recent progress in diffusion models has revolutionized the popular technology of text-to-image generation. While existing approaches could produce photorealistic high-resolution images with text conditions, there are still several open problems to be solved, which limits the further improvement of image fidelity and text relevancy. In this paper, we propose ERNIE-ViLG 2.0, a large-scale Chinese text-to-image diffusion model, to progressively upgrade the quality of generated images by: (1) incorporating fine-grained textual and visual knowledge of key elements in the scene, and (2) utilizing di"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2210.15257","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-10-27T08:21:35Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"1413035bdc3d01454e1793ffb401bbf5f9078e2271c366e2812996ea16a44ef9","abstract_canon_sha256":"539cfe3a065ebdca37a3f1625f43edb529a775566ca1cf15d3b117f5a410b340"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T05:55:13.535094Z","signature_b64":"w9m67FmN3yVr6lZBriJ0RxV5JZ8HXSoTAqLOjgFJCHP8DM/pqxZqUZc0/qnNHUGCM8D8Is7cMScrhHSVHixbBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"72c3f822eba3d695ff9643d8a78e287e4dcfdf2c8afceb38881842534eaa501e","last_reissued_at":"2026-07-05T05:55:13.534674Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T05:55:13.534674Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"ERNIE-ViLG 2.0: Improving Text-to-Image Diffusion Model with Knowledge-Enhanced Mixture-of-Denoising-Experts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"HaiFeng Wang, Hao Tian, Hua Wu, Jiaxiang Liu, Lanxin Li, Li Chen, Shikun Feng, Weichong Yin, Xintong Yu, Xuyi Chen, Yewei Fang, Yu Sun, Yuxiang Lu, Zhenyu Zhang, Zhida Feng","submitted_at":"2022-10-27T08:21:35Z","abstract_excerpt":"Recent progress in diffusion models has revolutionized the popular technology of text-to-image generation. While existing approaches could produce photorealistic high-resolution images with text conditions, there are still several open problems to be solved, which limits the further improvement of image fidelity and text relevancy. In this paper, we propose ERNIE-ViLG 2.0, a large-scale Chinese text-to-image diffusion model, to progressively upgrade the quality of generated images by: (1) incorporating fine-grained textual and visual knowledge of key elements in the scene, and (2) utilizing di"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2210.15257","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2210.15257/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2210.15257","created_at":"2026-07-05T05:55:13.534733+00:00"},{"alias_kind":"arxiv_version","alias_value":"2210.15257v2","created_at":"2026-07-05T05:55:13.534733+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2210.15257","created_at":"2026-07-05T05:55:13.534733+00:00"},{"alias_kind":"pith_short_12","alias_value":"OLB7QIXLUPLJ","created_at":"2026-07-05T05:55:13.534733+00:00"},{"alias_kind":"pith_short_16","alias_value":"OLB7QIXLUPLJL74W","created_at":"2026-07-05T05:55:13.534733+00:00"},{"alias_kind":"pith_short_8","alias_value":"OLB7QIXL","created_at":"2026-07-05T05:55:13.534733+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2305.02463","citing_title":"Shap-E: Generating Conditional 3D Implicit Functions","ref_index":15,"is_internal_anchor":false},{"citing_arxiv_id":"2604.20156","citing_title":"Temporally Extended Mixture-of-Experts Models","ref_index":12,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ","json":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ.json","graph_json":"https://pith.science/api/pith-number/OLB7QIXLUPLJL74WIPMKPDRIPZ/graph.json","events_json":"https://pith.science/api/pith-number/OLB7QIXLUPLJL74WIPMKPDRIPZ/events.json","paper":"https://pith.science/paper/OLB7QIXL"},"agent_actions":{"view_html":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ","download_json":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ.json","view_paper":"https://pith.science/paper/OLB7QIXL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2210.15257&json=true","fetch_graph":"https://pith.science/api/pith-number/OLB7QIXLUPLJL74WIPMKPDRIPZ/graph.json","fetch_events":"https://pith.science/api/pith-number/OLB7QIXLUPLJL74WIPMKPDRIPZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ/action/storage_attestation","attest_author":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ/action/author_attestation","sign_citation":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ/action/citation_signature","submit_replication":"https://pith.science/pith/OLB7QIXLUPLJL74WIPMKPDRIPZ/action/replication_record"}},"created_at":"2026-07-05T05:55:13.534733+00:00","updated_at":"2026-07-05T05:55:13.534733+00:00"}