{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2022:AT7SNXJ2YUBDO47YGBKZXAWQHS","short_pith_number":"pith:AT7SNXJ2","canonical_record":{"source":{"id":"2208.01626","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-08-02T17:55:41Z","cross_cats_sorted":["cs.CL","cs.GR","cs.LG"],"title_canon_sha256":"6f045d47f77f9d62c080c9273555e654308291b07760a0742e4f3abcf0504773","abstract_canon_sha256":"21a051649f91096729f99e9a3accb638b7af913634eb3aa79930b28f7a40f2a6"},"schema_version":"1.0"},"canonical_sha256":"04ff26dd3ac5023773f830559b82d03cbb9b046433b0bcf6f9402b0a74893087","source":{"kind":"arxiv","id":"2208.01626","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2208.01626","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"arxiv_version","alias_value":"2208.01626v1","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2208.01626","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"pith_short_12","alias_value":"AT7SNXJ2YUBD","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"pith_short_16","alias_value":"AT7SNXJ2YUBDO47Y","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"pith_short_8","alias_value":"AT7SNXJ2","created_at":"2026-07-05T04:45:35Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2022:AT7SNXJ2YUBDO47YGBKZXAWQHS","target":"record","payload":{"canonical_record":{"source":{"id":"2208.01626","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-08-02T17:55:41Z","cross_cats_sorted":["cs.CL","cs.GR","cs.LG"],"title_canon_sha256":"6f045d47f77f9d62c080c9273555e654308291b07760a0742e4f3abcf0504773","abstract_canon_sha256":"21a051649f91096729f99e9a3accb638b7af913634eb3aa79930b28f7a40f2a6"},"schema_version":"1.0"},"canonical_sha256":"04ff26dd3ac5023773f830559b82d03cbb9b046433b0bcf6f9402b0a74893087","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T04:45:35.578727Z","signature_b64":"No2fgxCn43hGi4tOZuTqQt+hXWwvfL+sxRjaN4jgnmKfzSP785yPmSC8OWS6pB50TV97N6IvO9TcgfuulDWKCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"04ff26dd3ac5023773f830559b82d03cbb9b046433b0bcf6f9402b0a74893087","last_reissued_at":"2026-07-05T04:45:35.578343Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T04:45:35.578343Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2208.01626","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T04:45:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AZQexKFZOHBTzMPoacfGlCkHO0y9kR2syAnrZsCNUFfr/QOyWu/mtix3nl+9zHnwJuU9p79VrOr6y/Uz/5xeAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T10:34:53.016574Z"},"content_sha256":"76275dee9d111842fd82af9c60ad7bad44e31f343ab63ff69301238aae9bb594","schema_version":"1.0","event_id":"sha256:76275dee9d111842fd82af9c60ad7bad44e31f343ab63ff69301238aae9bb594"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2022:AT7SNXJ2YUBDO47YGBKZXAWQHS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Prompt-to-Prompt Image Editing with Cross Attention Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Cross-attention layers let users edit images by changing only the text prompt.","cross_cats":["cs.CL","cs.GR","cs.LG"],"primary_cat":"cs.CV","authors_text":"Amir Hertz, Daniel Cohen-Or, Jay Tenenbaum, Kfir Aberman, Ron Mokady, Yael Pritch","submitted_at":"2022-08-02T17:55:41Z","abstract_excerpt":"Recent large-scale text-driven synthesis models have attracted much attention thanks to their remarkable capabilities of generating highly diverse images that follow given text prompts. Such text-based synthesis methods are particularly appealing to humans who are used to verbally describe their intent. Therefore, it is only natural to extend the text-driven image synthesis to text-driven image editing. Editing is challenging for these generative models, since an innate property of an editing technique is to preserve most of the original image, while in the text-based models, even a small modi"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"the cross-attention layers are the key to controlling the relation between the spatial layout of the image to each word in the prompt. With this observation, we present several applications which monitor the image synthesis by editing the textual prompt only.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the cross-attention mechanism is the dominant and controllable factor for spatial word-to-region mapping in the underlying generative model, and that targeted edits to these maps during inference will not introduce artifacts or require model retraining.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Cross-attention control in text-conditioned models enables localized and global image edits by editing only the input text prompt.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Cross-attention layers let users edit images by changing only the text prompt.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"5278ce2d287a8c6084715ee4e07d0153da2d976e1eb647faadedade980eb16ab"},"source":{"id":"2208.01626","kind":"arxiv","version":1},"verdict":{"id":"bd5b0419-1d51-48ee-8d01-ceacf5ed78c4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-11T06:55:05.216979Z","strongest_claim":"the cross-attention layers are the key to controlling the relation between the spatial layout of the image to each word in the prompt. With this observation, we present several applications which monitor the image synthesis by editing the textual prompt only.","one_line_summary":"Cross-attention control in text-conditioned models enables localized and global image edits by editing only the input text prompt.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the cross-attention mechanism is the dominant and controllable factor for spatial word-to-region mapping in the underlying generative model, and that targeted edits to these maps during inference will not introduce artifacts or require model retraining.","pith_extraction_headline":"Cross-attention layers let users edit images by changing only the text prompt."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2208.01626/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":51,"sample":[{"doi":"","year":2019,"title":"Image2stylegan: How to embed images into the stylegan latent space? In Proceedings of the IEEE/CVF International Conference on Computer Vision , pages 4432–4441","work_id":"cc4ef7d1-8138-4b50-ae5a-e077d9f3921a","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Clip2stylegan: Unsupervised extraction of stylegan edit directions","work_id":"0cabb8ec-5783-49a4-8115-48eda313c1ae","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Hyperstyle: Stylegan inversion with hypernetworks for real image editing","work_id":"b015cd3e-48ef-4c93-a1ea-ed5bd8974576","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"arXiv preprint arXiv:2206.02779 , year=","work_id":"1aef642e-c5e8-42d8-a391-f41998245355","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Blended diffusion for text-driven editing of natural images","work_id":"029d1aa2-9897-4370-913c-479a41f99d39","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":51,"snapshot_sha256":"66bb48d8324fb987bf77706f4ab378f373f64f9908ac388ab50d20ba21a74153","internal_anchors":6},"formal_canon":{"evidence_count":2,"snapshot_sha256":"76c60435d77a1c3ae210f9b2d673796afe8c2cfea8115d7a1c17025ea492afb8"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"bd5b0419-1d51-48ee-8d01-ceacf5ed78c4"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T04:45:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rCUJkGc52HLKfK+NF3i3KNZ8HJMK8wz8bTZUQX9zvIMk54ha1o8BN3bhcN9N64t4azVSXyNQ53Sc6uE81es4DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T10:34:53.017423Z"},"content_sha256":"68f16ae6538407470314dc367d87156230c00192f69f5ddaf53b537964a0d460","schema_version":"1.0","event_id":"sha256:68f16ae6538407470314dc367d87156230c00192f69f5ddaf53b537964a0d460"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS/bundle.json","state_url":"https://pith.science/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T10:34:53Z","links":{"resolver":"https://pith.science/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS","bundle":"https://pith.science/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS/bundle.json","state":"https://pith.science/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AT7SNXJ2YUBDO47YGBKZXAWQHS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2022:AT7SNXJ2YUBDO47YGBKZXAWQHS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"21a051649f91096729f99e9a3accb638b7af913634eb3aa79930b28f7a40f2a6","cross_cats_sorted":["cs.CL","cs.GR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-08-02T17:55:41Z","title_canon_sha256":"6f045d47f77f9d62c080c9273555e654308291b07760a0742e4f3abcf0504773"},"schema_version":"1.0","source":{"id":"2208.01626","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2208.01626","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"arxiv_version","alias_value":"2208.01626v1","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2208.01626","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"pith_short_12","alias_value":"AT7SNXJ2YUBD","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"pith_short_16","alias_value":"AT7SNXJ2YUBDO47Y","created_at":"2026-07-05T04:45:35Z"},{"alias_kind":"pith_short_8","alias_value":"AT7SNXJ2","created_at":"2026-07-05T04:45:35Z"}],"graph_snapshots":[{"event_id":"sha256:68f16ae6538407470314dc367d87156230c00192f69f5ddaf53b537964a0d460","target":"graph","created_at":"2026-07-05T04:45:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"the cross-attention layers are the key to controlling the relation between the spatial layout of the image to each word in the prompt. With this observation, we present several applications which monitor the image synthesis by editing the textual prompt only."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the cross-attention mechanism is the dominant and controllable factor for spatial word-to-region mapping in the underlying generative model, and that targeted edits to these maps during inference will not introduce artifacts or require model retraining."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Cross-attention control in text-conditioned models enables localized and global image edits by editing only the input text prompt."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Cross-attention layers let users edit images by changing only the text prompt."}],"snapshot_sha256":"5278ce2d287a8c6084715ee4e07d0153da2d976e1eb647faadedade980eb16ab"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"76c60435d77a1c3ae210f9b2d673796afe8c2cfea8115d7a1c17025ea492afb8"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2208.01626/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent large-scale text-driven synthesis models have attracted much attention thanks to their remarkable capabilities of generating highly diverse images that follow given text prompts. Such text-based synthesis methods are particularly appealing to humans who are used to verbally describe their intent. Therefore, it is only natural to extend the text-driven image synthesis to text-driven image editing. Editing is challenging for these generative models, since an innate property of an editing technique is to preserve most of the original image, while in the text-based models, even a small modi","authors_text":"Amir Hertz, Daniel Cohen-Or, Jay Tenenbaum, Kfir Aberman, Ron Mokady, Yael Pritch","cross_cats":["cs.CL","cs.GR","cs.LG"],"headline":"Cross-attention layers let users edit images by changing only the text prompt.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-08-02T17:55:41Z","title":"Prompt-to-Prompt Image Editing with Cross Attention Control"},"references":{"count":51,"internal_anchors":6,"resolved_work":51,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Image2stylegan: How to embed images into the stylegan latent space? In Proceedings of the IEEE/CVF International Conference on Computer Vision , pages 4432–4441","work_id":"cc4ef7d1-8138-4b50-ae5a-e077d9f3921a","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Clip2stylegan: Unsupervised extraction of stylegan edit directions","work_id":"0cabb8ec-5783-49a4-8115-48eda313c1ae","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Hyperstyle: Stylegan inversion with hypernetworks for real image editing","work_id":"b015cd3e-48ef-4c93-a1ea-ed5bd8974576","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"arXiv preprint arXiv:2206.02779 , year=","work_id":"1aef642e-c5e8-42d8-a391-f41998245355","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Blended diffusion for text-driven editing of natural images","work_id":"029d1aa2-9897-4370-913c-479a41f99d39","year":2022}],"snapshot_sha256":"66bb48d8324fb987bf77706f4ab378f373f64f9908ac388ab50d20ba21a74153"},"source":{"id":"2208.01626","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-11T06:55:05.216979Z","id":"bd5b0419-1d51-48ee-8d01-ceacf5ed78c4","model_set":{"reader":"grok-4.3"},"one_line_summary":"Cross-attention control in text-conditioned models enables localized and global image edits by editing only the input text prompt.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Cross-attention layers let users edit images by changing only the text prompt.","strongest_claim":"the cross-attention layers are the key to controlling the relation between the spatial layout of the image to each word in the prompt. With this observation, we present several applications which monitor the image synthesis by editing the textual prompt only.","weakest_assumption":"That the cross-attention mechanism is the dominant and controllable factor for spatial word-to-region mapping in the underlying generative model, and that targeted edits to these maps during inference will not introduce artifacts or require model retraining."}},"verdict_id":"bd5b0419-1d51-48ee-8d01-ceacf5ed78c4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:76275dee9d111842fd82af9c60ad7bad44e31f343ab63ff69301238aae9bb594","target":"record","created_at":"2026-07-05T04:45:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"21a051649f91096729f99e9a3accb638b7af913634eb3aa79930b28f7a40f2a6","cross_cats_sorted":["cs.CL","cs.GR","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2022-08-02T17:55:41Z","title_canon_sha256":"6f045d47f77f9d62c080c9273555e654308291b07760a0742e4f3abcf0504773"},"schema_version":"1.0","source":{"id":"2208.01626","kind":"arxiv","version":1}},"canonical_sha256":"04ff26dd3ac5023773f830559b82d03cbb9b046433b0bcf6f9402b0a74893087","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"04ff26dd3ac5023773f830559b82d03cbb9b046433b0bcf6f9402b0a74893087","first_computed_at":"2026-07-05T04:45:35.578343Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T04:45:35.578343Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"No2fgxCn43hGi4tOZuTqQt+hXWwvfL+sxRjaN4jgnmKfzSP785yPmSC8OWS6pB50TV97N6IvO9TcgfuulDWKCg==","signature_status":"signed_v1","signed_at":"2026-07-05T04:45:35.578727Z","signed_message":"canonical_sha256_bytes"},"source_id":"2208.01626","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:76275dee9d111842fd82af9c60ad7bad44e31f343ab63ff69301238aae9bb594","sha256:68f16ae6538407470314dc367d87156230c00192f69f5ddaf53b537964a0d460"],"state_sha256":"2ae7f1561d74b1e67a68c36d734ee0dab001abcfc6d978b58774823ec5d4247d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FTviZ6nNEJDeKSWWH0ehS0tZpshMXqiytJUSU2TTsfU8fhSslB+pCChGNB4eQlcMZJUlflEeujcp7n7X64QoBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T10:34:53.020932Z","bundle_sha256":"dc4e15a8a892900b6545326c6ec4d2fbf8dcde3b53f379974fdbf45d192916d9"}}