@@ -654,24 +654,28 @@ def apply_edit_conditioning(
654654 extra_input = [c .image for c in control_layers if c .mode .is_ip_adapter ]
655655 if len (extra_input ) == 0 :
656656 if arch == Arch .qwen_e_p :
657- return w .text_encode_qwen_image_edit_plus (clip , vae , [input_image ], positive )
657+ cond = w .text_encode_qwen_image_edit_plus (clip , None , [input_image ], positive )
658658 elif arch == Arch .qwen_e :
659- # Don't use VAE to force the reference latent
660659 cond = w .text_encode_qwen_image_edit (clip , None , input_image , positive )
661660 return w .reference_latent (cond , input_latent )
662661
663662 if arch == Arch .qwen_e_p :
664- return w .text_encode_qwen_image_edit_plus (
663+ extra_images = [i .load (w ) for i in extra_input ]
664+ cond = w .text_encode_qwen_image_edit_plus (
665665 clip ,
666- vae ,
667- [input_image ] + [ i . load ( w ) for i in extra_input ] ,
666+ None ,
667+ [input_image ] + extra_images ,
668668 positive ,
669669 )
670+ cond = w .reference_latent (cond , input_latent )
671+ for extra_image in extra_images :
672+ latent = vae_encode (w , vae , extra_image , tiled_vae )
673+ cond = w .reference_latent (cond , latent )
674+ return cond
670675 else :
671676 input = w .image_stitch ([input_image ] + [i .load (w ) for i in extra_input ])
672677 latent = vae_encode (w , vae , input , tiled_vae )
673678 if arch == Arch .qwen_e :
674- # Don't use VAE to force the reference latent
675679 cond = w .text_encode_qwen_image_edit (clip , None , input , positive )
676680 cond = w .reference_latent (cond , latent )
677681 return cond
0 commit comments