Added support for Qwen-Image-Edit-Plus, added Arch.qwen_e_p

Danamir · Danamir · commit 14064f629b72 · 2025-10-05T14:18:47.000+02:00
diff --git a/ai_diffusion/comfy_workflow.py b/ai_diffusion/comfy_workflow.py
@@ -606,6 +606,24 @@ def text_encode_qwen_image_edit(
             "TextEncodeQwenImageEdit", 1, clip=clip, vae=vae, image=image, prompt=prompt
         )
 
+    def text_encode_qwen_image_edit_plus(
+        self, clip: Output, vae: Output | None, images: list[Output], prompt: str | Output
+    ):
+        image1 = images[0] if len(images) > 0 else None
+        image2 = images[1] if len(images) > 1 else None
+        image3 = images[2] if len(images) > 2 else None
+
+        return self.add(
+            "TextEncodeQwenImageEditPlus",
+            1,
+            clip=clip,
+            vae=vae,
+            image1=image1,
+            image2=image2,
+            image3=image3,
+            prompt=prompt,
+        )
+
     def background_region(self, conditioning: Output):
         return self.add("ETN_BackgroundRegion", 1, conditioning=conditioning)
 
diff --git a/ai_diffusion/resolution.py b/ai_diffusion/resolution.py
@@ -152,6 +152,8 @@ def compute(extent: Extent, arch: Arch, style: Style | None = None):
                 Arch.sd3: (512, 1536, 512**2, 1536**2),
                 Arch.flux: (256, 2048, 512**2, 2048**2),
                 Arch.qwen: (256, 2048, 512**2, 2048**2),
+                Arch.qwen_e: (256, 2048, 512**2, 2048**2),
+                Arch.qwen_e_p: (256, 2048, 512**2, 2048**2),
             }[arch]
         else:
             range_offset = multiple_of(round(0.2 * style.preferred_resolution), 8)
diff --git a/ai_diffusion/resources.py b/ai_diffusion/resources.py
@@ -85,6 +85,7 @@ class Arch(Enum):
     chroma = "Chroma"
     qwen = "Qwen"
     qwen_e = "Qwen Edit"
+    qwen_e_p = "Qwen Edit Plus"
 
     auto = "Automatic"
     all = "All"
@@ -109,13 +110,12 @@ def from_string(string: str, model_type: str = "eps", filename: str | None = Non
             return Arch.illu_v
         if string == "chroma":
             return Arch.chroma
-        if (
-            string in ("qwen", "qwen_image", "qwen-image")
-            and filename
-            and "edit" in filename.lower()
-        ):
-            return Arch.qwen_e
-        if string in ("qwen", "qwen_image", "qwen-image"):
+        if string == "qwen-image" and filename and "edit" in filename.lower():
+            if "2509" in filename.lower():
+                return Arch.qwen_e_p
+            else:
+                return Arch.qwen_e
+        if string == "qwen-image":
             return Arch.qwen
         return None
 
@@ -167,7 +167,7 @@ def supports_cfg(self):
 
     @property
     def is_edit(self):  # edit models make changes to input images
-        return self in [Arch.flux_k, Arch.qwen_e]
+        return self in [Arch.flux_k, Arch.qwen_e, Arch.qwen_e_p]
 
     @property
     def is_sdxl_like(self):
@@ -178,6 +178,10 @@ def is_sdxl_like(self):
     def is_flux_like(self):
         return self in [Arch.flux, Arch.flux_k]
 
+    @property
+    def is_qwen_like(self):
+        return self in [Arch.qwen, Arch.qwen_e, Arch.qwen_e_p]
+
     @property
     def text_encoders(self):
         match self:
@@ -191,7 +195,7 @@ def text_encoders(self):
                 return ["clip_l", "t5"]
             case Arch.chroma:
                 return ["t5"]
-            case Arch.qwen | Arch.qwen_e:
+            case Arch.qwen | Arch.qwen_e | Arch.qwen_e_p:
                 return ["qwen"]
         raise ValueError(f"Unsupported architecture: {self}")
 
@@ -208,6 +212,7 @@ def list():
             Arch.chroma,
             Arch.qwen,
             Arch.qwen_e,
+            Arch.qwen_e_p,
         ]
 
 
@@ -714,6 +719,7 @@ def is_required(kind: ResourceKind, arch: Arch, identifier: ControlMode | Upscal
     resource_id(ResourceKind.vae, Arch.chroma, "default"): ["flux", "ae.s"],
     resource_id(ResourceKind.vae, Arch.qwen, "default"): ["qwen"],
     resource_id(ResourceKind.vae, Arch.qwen_e, "default"): ["qwen"],
+    resource_id(ResourceKind.vae, Arch.qwen_e_p, "default"): ["qwen"],
 }
 # fmt: on
 
diff --git a/ai_diffusion/ui/server.py b/ai_diffusion/ui/server.py
@@ -202,6 +202,7 @@ def _workload_matches(self, item: PackageItem):
                 Arch.chroma,
                 Arch.qwen,
                 Arch.qwen_e,
+                Arch.qwen_e_p,
             ]
         )
 
diff --git a/ai_diffusion/ui/style.py b/ai_diffusion/ui/style.py
@@ -765,8 +765,8 @@ def _change_style(self):
         self._read_style(self.current_style)
 
     def _open_checkpoints_folder(self):
-        arch = arch = resolve_arch(self.current_style, root.connection.client_if_connected)
-        if arch.is_flux_like or arch in (Arch.chroma, Arch.qwen, Arch.qwen_e):
+        arch = resolve_arch(self.current_style, root.connection.client_if_connected)
+        if arch.is_flux_like or arch == Arch.chroma or arch.is_qwen_like:
             self._open_folder(Path("models/diffusion_models"))
         else:
             self._open_folder(Path("models/checkpoints"))
@@ -883,6 +883,8 @@ def _enable_checkpoint_advanced(self):
             valid_archs = (Arch.auto, Arch.sdxl, Arch.illu, Arch.illu_v)
         elif arch.is_flux_like:
             valid_archs = (Arch.auto, Arch.flux, Arch.flux_k)
+        elif arch.is_qwen_like:
+            valid_archs = (Arch.auto, Arch.qwen, Arch.qwen_e, Arch.qwen_e_p)
         else:
             valid_archs = (Arch.auto, arch)
         with SignalBlocker(self._arch_select):
diff --git a/ai_diffusion/ui/theme.py b/ai_diffusion/ui/theme.py
@@ -68,7 +68,7 @@ def checkpoint_icon(arch: Arch, format: FileFormat | None = None, client: Client
         return icon("sd-version-chroma")
     elif arch is Arch.qwen:
         return icon("sd-version-qwen")
-    elif arch is Arch.qwen_e:
+    elif arch in (Arch.qwen_e, Arch.qwen_e_p):
         return icon("sd-version-qwen-e")
     else:
         log.warning(f"Unresolved SD version {arch}, cannot fetch icon")
diff --git a/ai_diffusion/workflow.py b/ai_diffusion/workflow.py
@@ -100,12 +100,12 @@ def load_checkpoint_with_lora(w: ComfyWorkflow, checkpoint: CheckpointInput, mod
         case (FileFormat.diffusion, Quantization.none):
             model = w.load_diffusion_model(model_info.filename)
         case (FileFormat.diffusion, Quantization.svdq):
-            if model_info.arch in (Arch.flux, Arch.flux_k):
+            if model_info.arch.is_flux_like:
                 cache = 0.12 if checkpoint.dynamic_caching else 0.0
                 model = w.nunchaku_load_flux_diffusion_model(
                     model_info.filename, cache_threshold=cache
                 )
-            elif model_info.arch in (Arch.qwen, Arch.qwen_e):
+            elif model_info.arch.is_qwen_like:
                 # WIP #2072 replace by customizable parameters
                 model = w.nunchaku_load_qwen_diffusion_model(
                     model_info.filename,
@@ -139,7 +139,7 @@ def load_checkpoint_with_lora(w: ComfyWorkflow, checkpoint: CheckpointInput, mod
             case Arch.chroma:
                 clip = w.load_clip(te["t5"], type="chroma")
                 clip = w.t5_tokenizer_options(clip, min_padding=1, min_length=0)
-            case Arch.qwen | Arch.qwen_e:
+            case Arch.qwen | Arch.qwen_e | Arch.qwen_e_p:
                 clip = w.load_clip(te["qwen"], type="qwen_image")
             case _:
                 raise RuntimeError(f"No text encoder for model architecture {arch.name}")
@@ -653,18 +653,28 @@ def apply_edit_conditioning(
 
     extra_input = [c.image for c in control_layers if c.mode.is_ip_adapter]
     if len(extra_input) == 0:
-        if arch == Arch.qwen_e:
+        if arch == Arch.qwen_e_p:
+            return w.text_encode_qwen_image_edit_plus(clip, vae, [input_image], positive)
+        elif arch == Arch.qwen_e:
             # Don't use VAE to force the reference latent
             cond = w.text_encode_qwen_image_edit(clip, None, input_image, positive)
         return w.reference_latent(cond, input_latent)
 
-    input = w.image_stitch([input_image] + [i.load(w) for i in extra_input])
-    latent = vae_encode(w, vae, input, tiled_vae)
-    if arch == Arch.qwen_e:
-        # Don't use VAE to force the reference latent
-        cond = w.text_encode_qwen_image_edit(clip, None, input, positive)
-    cond = w.reference_latent(cond, latent)
-    return cond
+    if arch == Arch.qwen_e_p:
+        return w.text_encode_qwen_image_edit_plus(
+            clip,
+            vae,
+            [input_image] + [i.load(w) for i in extra_input],
+            positive,
+        )
+    else:
+        input = w.image_stitch([input_image] + [i.load(w) for i in extra_input])
+        latent = vae_encode(w, vae, input, tiled_vae)
+        if arch == Arch.qwen_e:
+            # Don't use VAE to force the reference latent
+            cond = w.text_encode_qwen_image_edit(clip, None, input, positive)
+        cond = w.reference_latent(cond, latent)
+        return cond
 
 
 def scale(

Original file line number	Diff line number	Diff line change
`@@ -202,6 +202,7 @@ def _workload_matches(self, item: PackageItem):`
`202`	`202`	`Arch.chroma,`
`203`	`203`	`Arch.qwen,`
`204`	`204`	`Arch.qwen_e,`
	`205`	`+ Arch.qwen_e_p,`
`205`	`206`	`]`
`206`	`207`	`)`
`207`	`208`