@@ -100,12 +100,12 @@ def load_checkpoint_with_lora(w: ComfyWorkflow, checkpoint: CheckpointInput, mod
100100 case (FileFormat .diffusion , Quantization .none ):
101101 model = w .load_diffusion_model (model_info .filename )
102102 case (FileFormat .diffusion , Quantization .svdq ):
103- if model_info .arch in ( Arch . flux , Arch . flux_k ) :
103+ if model_info .arch . is_flux_like :
104104 cache = 0.12 if checkpoint .dynamic_caching else 0.0
105105 model = w .nunchaku_load_flux_diffusion_model (
106106 model_info .filename , cache_threshold = cache
107107 )
108- elif model_info .arch in ( Arch . qwen , Arch . qwen_e ) :
108+ elif model_info .arch . is_qwen_like :
109109 # WIP #2072 replace by customizable parameters
110110 model = w .nunchaku_load_qwen_diffusion_model (
111111 model_info .filename ,
@@ -139,7 +139,7 @@ def load_checkpoint_with_lora(w: ComfyWorkflow, checkpoint: CheckpointInput, mod
139139 case Arch .chroma :
140140 clip = w .load_clip (te ["t5" ], type = "chroma" )
141141 clip = w .t5_tokenizer_options (clip , min_padding = 1 , min_length = 0 )
142- case Arch .qwen | Arch .qwen_e :
142+ case Arch .qwen | Arch .qwen_e | Arch . qwen_e_p :
143143 clip = w .load_clip (te ["qwen" ], type = "qwen_image" )
144144 case _:
145145 raise RuntimeError (f"No text encoder for model architecture { arch .name } " )
@@ -653,18 +653,28 @@ def apply_edit_conditioning(
653653
654654 extra_input = [c .image for c in control_layers if c .mode .is_ip_adapter ]
655655 if len (extra_input ) == 0 :
656- if arch == Arch .qwen_e :
656+ if arch == Arch .qwen_e_p :
657+ return w .text_encode_qwen_image_edit_plus (clip , vae , [input_image ], positive )
658+ elif arch == Arch .qwen_e :
657659 # Don't use VAE to force the reference latent
658660 cond = w .text_encode_qwen_image_edit (clip , None , input_image , positive )
659661 return w .reference_latent (cond , input_latent )
660662
661- input = w .image_stitch ([input_image ] + [i .load (w ) for i in extra_input ])
662- latent = vae_encode (w , vae , input , tiled_vae )
663- if arch == Arch .qwen_e :
664- # Don't use VAE to force the reference latent
665- cond = w .text_encode_qwen_image_edit (clip , None , input , positive )
666- cond = w .reference_latent (cond , latent )
667- return cond
663+ if arch == Arch .qwen_e_p :
664+ return w .text_encode_qwen_image_edit_plus (
665+ clip ,
666+ vae ,
667+ [input_image ] + [i .load (w ) for i in extra_input ],
668+ positive ,
669+ )
670+ else :
671+ input = w .image_stitch ([input_image ] + [i .load (w ) for i in extra_input ])
672+ latent = vae_encode (w , vae , input , tiled_vae )
673+ if arch == Arch .qwen_e :
674+ # Don't use VAE to force the reference latent
675+ cond = w .text_encode_qwen_image_edit (clip , None , input , positive )
676+ cond = w .reference_latent (cond , latent )
677+ return cond
668678
669679
670680def scale (
0 commit comments