{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:UTRDYT6CJUBBSHFVA7YYXGCZKC","short_pith_number":"pith:UTRDYT6C","canonical_record":{"source":{"id":"2302.08453","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-02-16T17:56:08Z","cross_cats_sorted":["cs.AI","cs.LG","cs.MM"],"title_canon_sha256":"3493f9a6a3b749cc00c722a39ae608f90bd5448d955798810d630ed3d7ddd30a","abstract_canon_sha256":"e545ed690231d735387208fe8386e4305dfae265b4b9b2599e1aabb9efe9bccf"},"schema_version":"1.0"},"canonical_sha256":"a4e23c4fc24d02191cb507f18b9859509c58f3dc6a9de13d2dce55948f469b3c","source":{"kind":"arxiv","id":"2302.08453","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2302.08453","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2302.08453v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2302.08453","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"UTRDYT6CJUBB","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"UTRDYT6CJUBBSHFV","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"UTRDYT6C","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:UTRDYT6CJUBBSHFVA7YYXGCZKC","target":"record","payload":{"canonical_record":{"source":{"id":"2302.08453","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-02-16T17:56:08Z","cross_cats_sorted":["cs.AI","cs.LG","cs.MM"],"title_canon_sha256":"3493f9a6a3b749cc00c722a39ae608f90bd5448d955798810d630ed3d7ddd30a","abstract_canon_sha256":"e545ed690231d735387208fe8386e4305dfae265b4b9b2599e1aabb9efe9bccf"},"schema_version":"1.0"},"canonical_sha256":"a4e23c4fc24d02191cb507f18b9859509c58f3dc6a9de13d2dce55948f469b3c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.340209Z","signature_b64":"RNnCUV31pQzYoHrrOm9SuWEBcRf6GnK+mtacLRvHj2kCVHOX4fxlovkq0xFJUQ8qr4JQyAtcLk6wSYwGUZGTDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a4e23c4fc24d02191cb507f18b9859509c58f3dc6a9de13d2dce55948f469b3c","last_reissued_at":"2026-05-17T23:38:46.339664Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.339664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2302.08453","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mempn50vsytx5r8LJk/eftATK+fqIMm5Sabks2R9arzl0Pw+K10oM8IowKPY1jGxECpQwm8byHOrOH3jXFCEBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:35:47.984863Z"},"content_sha256":"da5411ee7a45d2746a6f34d313f17b928cae8b903765c1f75accc431b4286fb0","schema_version":"1.0","event_id":"sha256:da5411ee7a45d2746a6f34d313f17b928cae8b903765c1f75accc431b4286fb0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:UTRDYT6CJUBBSHFVA7YYXGCZKC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"T2I-Adapter: Learning Adapters to Dig out More Controllable Ability for Text-to-Image Diffusion Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Lightweight adapters align external signals with the internal knowledge of frozen text-to-image diffusion models.","cross_cats":["cs.AI","cs.LG","cs.MM"],"primary_cat":"cs.CV","authors_text":"Chong Mou, Jian Zhang, Liangbin Xie, Xiaohu Qie, Xintao Wang, Yanze Wu, Ying Shan, Zhongang Qi","submitted_at":"2023-02-16T17:56:08Z","abstract_excerpt":"The incredible generative ability of large-scale text-to-image (T2I) models has demonstrated strong power of learning complex structures and meaningful semantics. However, relying solely on text prompts cannot fully take advantage of the knowledge learned by the model, especially when flexible and accurate controlling (e.g., color and structure) is needed. In this paper, we aim to ``dig out\" the capabilities that T2I models have implicitly learned, and then explicitly use them to control the generation more granularly. Specifically, we propose to learn simple and lightweight T2I-Adapters to al"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"we propose to learn simple and lightweight T2I-Adapters to align internal knowledge in T2I models with external control signals, while freezing the original large T2I models.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the internal knowledge implicitly learned by large T2I models can be effectively aligned with external control signals using simple lightweight adapters without degrading generative quality or requiring full model retraining.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"T2I-Adapters are lightweight modules that enable fine-grained control over color and structure in text-to-image diffusion models by aligning external conditions with the frozen model's internal knowledge.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Lightweight adapters align external signals with the internal knowledge of frozen text-to-image diffusion models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"b527280d0607c40b6b5f40438bcb17f874526f01f80c619b9587e7395a95ffab"},"source":{"id":"2302.08453","kind":"arxiv","version":2},"verdict":{"id":"7c68bf8c-0a5c-4e4e-8952-60b6d2ec499c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T22:44:07.595923Z","strongest_claim":"we propose to learn simple and lightweight T2I-Adapters to align internal knowledge in T2I models with external control signals, while freezing the original large T2I models.","one_line_summary":"T2I-Adapters are lightweight modules that enable fine-grained control over color and structure in text-to-image diffusion models by aligning external conditions with the frozen model's internal knowledge.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the internal knowledge implicitly learned by large T2I models can be effectively aligned with external control signals using simple lightweight adapters without degrading generative quality or requiring full model retraining.","pith_extraction_headline":"Lightweight adapters align external signals with the internal knowledge of frozen text-to-image diffusion models."},"references":{"count":47,"sample":[{"doi":"","year":2022,"title":"eDiff-I: Text-to-Image Diffusion Models with an Ensemble of Expert Denoisers","work_id":"2cd7b629-ab37-4ce5-b51e-aa4d99547468","ref_index":1,"cited_arxiv_id":"2211.01324","is_internal_anchor":true},{"doi":"","year":2018,"title":"Coco- stuff: Thing and stuff classes in context","work_id":"649caf1e-4b1d-47df-83fd-95d8f230ac97","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Vision transformer adapter for dense predictions","work_id":"2371694e-3e9f-4892-a1a2-f64b28d4b349","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Openmmlab pose estimation toolbox and benchmark","work_id":"c2f0a614-b463-40d8-9007-d3ab5f2f0d14","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Gen- erative adversarial networks: An overview","work_id":"9a75560f-8b87-4ce7-a03c-f2bba15dbf27","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":47,"snapshot_sha256":"c82d31b5ac7b3de42f604c9fad087aa16850081e6f79bd3bd5c5a34c20732e36","internal_anchors":7},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"7c68bf8c-0a5c-4e4e-8952-60b6d2ec499c"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J3umdVuvew6H0HrW6tTU4xA+jEzCkXWELoRLbvI1+LH5zP/BZFuUC/qT3qgLFgfbN62DxLpyGVsysa12HOnWCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:35:47.985963Z"},"content_sha256":"e0f54081fc0fefe4893e2c38956eb0b6ae6f046416bf82045b2f8be615fbea69","schema_version":"1.0","event_id":"sha256:e0f54081fc0fefe4893e2c38956eb0b6ae6f046416bf82045b2f8be615fbea69"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC/bundle.json","state_url":"https://pith.science/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T23:35:47Z","links":{"resolver":"https://pith.science/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC","bundle":"https://pith.science/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC/bundle.json","state":"https://pith.science/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UTRDYT6CJUBBSHFVA7YYXGCZKC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:UTRDYT6CJUBBSHFVA7YYXGCZKC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e545ed690231d735387208fe8386e4305dfae265b4b9b2599e1aabb9efe9bccf","cross_cats_sorted":["cs.AI","cs.LG","cs.MM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-02-16T17:56:08Z","title_canon_sha256":"3493f9a6a3b749cc00c722a39ae608f90bd5448d955798810d630ed3d7ddd30a"},"schema_version":"1.0","source":{"id":"2302.08453","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2302.08453","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2302.08453v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2302.08453","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"UTRDYT6CJUBB","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"UTRDYT6CJUBBSHFV","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"UTRDYT6C","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:e0f54081fc0fefe4893e2c38956eb0b6ae6f046416bf82045b2f8be615fbea69","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we propose to learn simple and lightweight T2I-Adapters to align internal knowledge in T2I models with external control signals, while freezing the original large T2I models."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the internal knowledge implicitly learned by large T2I models can be effectively aligned with external control signals using simple lightweight adapters without degrading generative quality or requiring full model retraining."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"T2I-Adapters are lightweight modules that enable fine-grained control over color and structure in text-to-image diffusion models by aligning external conditions with the frozen model's internal knowledge."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Lightweight adapters align external signals with the internal knowledge of frozen text-to-image diffusion models."}],"snapshot_sha256":"b527280d0607c40b6b5f40438bcb17f874526f01f80c619b9587e7395a95ffab"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The incredible generative ability of large-scale text-to-image (T2I) models has demonstrated strong power of learning complex structures and meaningful semantics. However, relying solely on text prompts cannot fully take advantage of the knowledge learned by the model, especially when flexible and accurate controlling (e.g., color and structure) is needed. In this paper, we aim to ``dig out\" the capabilities that T2I models have implicitly learned, and then explicitly use them to control the generation more granularly. Specifically, we propose to learn simple and lightweight T2I-Adapters to al","authors_text":"Chong Mou, Jian Zhang, Liangbin Xie, Xiaohu Qie, Xintao Wang, Yanze Wu, Ying Shan, Zhongang Qi","cross_cats":["cs.AI","cs.LG","cs.MM"],"headline":"Lightweight adapters align external signals with the internal knowledge of frozen text-to-image diffusion models.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-02-16T17:56:08Z","title":"T2I-Adapter: Learning Adapters to Dig out More Controllable Ability for Text-to-Image Diffusion Models"},"references":{"count":47,"internal_anchors":7,"resolved_work":47,"sample":[{"cited_arxiv_id":"2211.01324","doi":"","is_internal_anchor":true,"ref_index":1,"title":"eDiff-I: Text-to-Image Diffusion Models with an Ensemble of Expert Denoisers","work_id":"2cd7b629-ab37-4ce5-b51e-aa4d99547468","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Coco- stuff: Thing and stuff classes in context","work_id":"649caf1e-4b1d-47df-83fd-95d8f230ac97","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Vision transformer adapter for dense predictions","work_id":"2371694e-3e9f-4892-a1a2-f64b28d4b349","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Openmmlab pose estimation toolbox and benchmark","work_id":"c2f0a614-b463-40d8-9007-d3ab5f2f0d14","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Gen- erative adversarial networks: An overview","work_id":"9a75560f-8b87-4ce7-a03c-f2bba15dbf27","year":2018}],"snapshot_sha256":"c82d31b5ac7b3de42f604c9fad087aa16850081e6f79bd3bd5c5a34c20732e36"},"source":{"id":"2302.08453","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T22:44:07.595923Z","id":"7c68bf8c-0a5c-4e4e-8952-60b6d2ec499c","model_set":{"reader":"grok-4.3"},"one_line_summary":"T2I-Adapters are lightweight modules that enable fine-grained control over color and structure in text-to-image diffusion models by aligning external conditions with the frozen model's internal knowledge.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Lightweight adapters align external signals with the internal knowledge of frozen text-to-image diffusion models.","strongest_claim":"we propose to learn simple and lightweight T2I-Adapters to align internal knowledge in T2I models with external control signals, while freezing the original large T2I models.","weakest_assumption":"That the internal knowledge implicitly learned by large T2I models can be effectively aligned with external control signals using simple lightweight adapters without degrading generative quality or requiring full model retraining."}},"verdict_id":"7c68bf8c-0a5c-4e4e-8952-60b6d2ec499c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:da5411ee7a45d2746a6f34d313f17b928cae8b903765c1f75accc431b4286fb0","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e545ed690231d735387208fe8386e4305dfae265b4b9b2599e1aabb9efe9bccf","cross_cats_sorted":["cs.AI","cs.LG","cs.MM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2023-02-16T17:56:08Z","title_canon_sha256":"3493f9a6a3b749cc00c722a39ae608f90bd5448d955798810d630ed3d7ddd30a"},"schema_version":"1.0","source":{"id":"2302.08453","kind":"arxiv","version":2}},"canonical_sha256":"a4e23c4fc24d02191cb507f18b9859509c58f3dc6a9de13d2dce55948f469b3c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a4e23c4fc24d02191cb507f18b9859509c58f3dc6a9de13d2dce55948f469b3c","first_computed_at":"2026-05-17T23:38:46.339664Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.339664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RNnCUV31pQzYoHrrOm9SuWEBcRf6GnK+mtacLRvHj2kCVHOX4fxlovkq0xFJUQ8qr4JQyAtcLk6wSYwGUZGTDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.340209Z","signed_message":"canonical_sha256_bytes"},"source_id":"2302.08453","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:da5411ee7a45d2746a6f34d313f17b928cae8b903765c1f75accc431b4286fb0","sha256:e0f54081fc0fefe4893e2c38956eb0b6ae6f046416bf82045b2f8be615fbea69"],"state_sha256":"59d7cbaede2dc6dba9b0362ab35125706b48c17fd8d794e62d77f8980f247e4d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"k2AMMX5wZnLOcsJgUWcCtfeCCAVRqGq+DJdwgi2rd0ZquNuayxRdpaZzCV28hpl9RM/Ufog8Pify+FQWarG7DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T23:35:47.990477Z","bundle_sha256":"94d4a5aa92cf6036c1b9fe71611e5b66547f37baeaa3ca2e3b25d692abd4fb6c"}}