Skip to content

Commit 99fa539

Browse files
committed
Added support for Qwen-Image-Edit with TextEncodeQwenImageEdit node
1 parent 1d53e2f commit 99fa539

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

ai_diffusion/comfy_workflow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ def instruct_pix_to_pix_conditioning(
597597
def reference_latent(self, conditioning: Output, latent: Output):
598598
return self.add("ReferenceLatent", 1, conditioning=conditioning, latent=latent)
599599

600+
def text_encode_qwen_image_edit(self, clip: Output, vae: Output | None, image: Output, prompt: str | Output):
601+
return self.add("TextEncodeQwenImageEdit", 1, clip=clip, vae=vae, image=image, prompt=prompt)
602+
600603
def background_region(self, conditioning: Output):
601604
return self.add("ETN_BackgroundRegion", 1, conditioning=conditioning)
602605

ai_diffusion/workflow.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,8 @@ def apply_edit_conditioning(
635635
input_latent: Output,
636636
control_layers: list[Control],
637637
vae: Output,
638+
clip: Output,
639+
positive: str,
638640
arch: Arch,
639641
tiled_vae: bool,
640642
):
@@ -643,10 +645,16 @@ def apply_edit_conditioning(
643645

644646
extra_input = [c.image for c in control_layers if c.mode.is_ip_adapter]
645647
if len(extra_input) == 0:
648+
if arch == Arch.qwen_e:
649+
# Don't use VAE to force the reference latent
650+
cond = w.text_encode_qwen_image_edit(clip, None, input_image, positive)
646651
return w.reference_latent(cond, input_latent)
647652

648653
input = w.image_stitch([input_image] + [i.load(w) for i in extra_input])
649654
latent = vae_encode(w, vae, input, tiled_vae)
655+
if arch == Arch.qwen_e:
656+
# Don't use VAE to force the reference latent
657+
cond = w.text_encode_qwen_image_edit(clip, None, input, positive)
650658
cond = w.reference_latent(cond, latent)
651659
return cond
652660

@@ -734,7 +742,7 @@ def scale_refine_and_decode(
734742
model, positive, negative = apply_control(
735743
w, model, positive, negative, cond.all_control, extent.desired, vae, models
736744
)
737-
positive = apply_edit_conditioning(w, positive, upscale, latent, [], vae, arch, tiled_vae)
745+
positive = apply_edit_conditioning(w, positive, upscale, latent, [], vae, clip.model, cond.positive.text, arch, tiled_vae)
738746
result = w.sampler_custom_advanced(model, positive, negative, latent, arch, **params)
739747
image = vae_decode(w, vae, result, tiled_vae)
740748
return image
@@ -1015,7 +1023,7 @@ def refine(
10151023
w, model, positive, negative, cond.all_control, extent.desired, vae, models
10161024
)
10171025
positive = apply_edit_conditioning(
1018-
w, positive, in_image, latent, cond.all_control, vae, models.arch, checkpoint.tiled_vae
1026+
w, positive, in_image, latent, cond.all_control, vae, clip.model, cond.positive.text, models.arch, checkpoint.tiled_vae
10191027
)
10201028
sampler = w.sampler_custom_advanced(
10211029
model, positive, negative, latent_batch, models.arch, **_sampler_params(sampling)
@@ -1067,7 +1075,7 @@ def refine_region(
10671075
else:
10681076
latent = vae_encode(w, vae, in_image, checkpoint.tiled_vae)
10691077
positive = apply_edit_conditioning(
1070-
w, positive, in_image, latent, cond.all_control, vae, models.arch, checkpoint.tiled_vae
1078+
w, positive, in_image, latent, cond.all_control, vae, clip.model, cond.positive.text, models.arch, checkpoint.tiled_vae
10711079
)
10721080
latent = w.set_latent_noise_mask(latent, initial_mask)
10731081
inpaint_model = model

0 commit comments

Comments
 (0)