yolain
diff --git a/‎README.ZH_CN.md
Lines changed: 1 addition & 10 deletions b/‎README.ZH_CN.md
Lines changed: 1 addition & 10 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 10 deletions b/‎README.md
Lines changed: 1 addition & 10 deletions
diff --git a/‎py/config.py
Lines changed: 10 additions & 6 deletions b/‎py/config.py
Lines changed: 10 additions & 6 deletions
diff --git a/‎py/easyNodes.py
Lines changed: 17 additions & 9 deletions b/‎py/easyNodes.py
Lines changed: 17 additions & 9 deletions
diff --git a/‎py/ipadapter/__init__.py
Lines changed: 140 additions & 1 deletion b/‎py/ipadapter/__init__.py
Lines changed: 140 additions & 1 deletion
@@ -50,22 +50,13 @@ git clone https://github.com/yolain/ComfyUI-Easy-Use
 双击install.bat安装依赖
 ```
 
-## 👨🏻‍🚀 计划
-
-- [x] 更新便于维护的新前端代码
-  - [x] 使用sass维护css样式
-  - [x] 对原有扩展进行优化
-  - [x] 增加新的组件（如节点时间统计等） 
-- [ ] 在[ComfyUI-Yolain-Workflows](https://github.com/yolain/ComfyUI-Yolain-Workflows)中上传更多的工作流（如kolors,sd3等），并更新english版本的readme  
-- [ ] 更详细功能介绍的 gitbook   
-
 ## 📜 更新日志
 
 **v1.2.5**
 
 - 在 `easy preSamplingCustom` 和 `easy preSamplingAdvanced` 上增加 `enable (GPU=A1111)` 噪波生成模式选择项
 - 增加 `easy makeImageForICLora`
-- 在 `easy ipadapterApply` 添加 `FLUX.1-dev` 预置项以支持 InstantX Flux ipadapter 
+- 在 `easy ipadapterApply` 添加 `REGULAR - FLUX and SD3.5 only (high strength)` 预置项以支持 InstantX Flux ipadapter 
 - 修复brushnet 无法在 `--fast` 模式下使用 
 - 支持briaai RMBG-2.0
 - 支持mochi模型
 
@@ -45,22 +45,13 @@ git clone https://github.com/yolain/ComfyUI-Easy-Use
 Double-click install.bat to install the required dependencies
 ```
 
-## 👨🏻‍🚀 Plan
-
-- [x] Updated new front-end code for easier maintenance
-  - [x] Maintain css styles using sass
-  - [x] Optimize existing extensions
-  - [x] Add new components
-- [ ] Upload new workflows to [ComfyUI-Yolain-Workflows](https://github.com/yolain/ComfyUI-Yolain-Workflows) and translate readme to english version.
-- [ ] Write gitbook with more detailed function introdution
-
 ## 📜 Changelog
 
 **v1.2.5**
 
 - Added `enable (GPU=A1111)` noise mode on `easy preSamplingCustom` and `easy preSamplingAdvanced` 
 - Added `easy makeImageForICLora`
-- Added `FLUX.1-dev` preset for InstantX Flux ipadapter on `easy ipadapterApply`
+- Added `REGULAR - FLUX and SD3.5 only (high strength)` preset for InstantX Flux ipadapter on `easy ipadapterApply`
 - Fix brushnet can not be used with startup arg `--fast` mode
 - Support briaai RMBG-2.0
 - Support mochi 
 
@@ -231,19 +231,23 @@
             "model_url": "https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors"
         }
     },
-    "PLUS (kolors genernal)":{
-        "sd1":{
-            "model_url":""
+    "PLUS (kolors genernal)": {
+        "sd1": {
+            "model_url": ""
         },
-        "sdxl":{
+        "sdxl": {
             "model_url":"https://huggingface.co/Kwai-Kolors/Kolors-IP-Adapter-Plus/resolve/main/ip_adapter_plus_general.bin"
         }
     },
-    "FLUX.1-dev": {
-       "flux":{
+    "REGULAR - FLUX and SD3.5 only (high strength)": {
+       "flux": {
            "model_url": "https://huggingface.co/InstantX/FLUX.1-dev-IP-Adapter/resolve/main/ip-adapter.bin",
            "model_file_name": "ip-adapter_flux_1_dev.bin",
        },
+       "sd3": {
+           "model_url": "https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter/resolve/main/ip-adapter.bin",
+           "model_file_name": "ip-adapter_sd35.bin",
+       },
     },
     "PLUS FACE (portraits)": {
         "sd1": {
 
@@ -3000,7 +3000,7 @@ def __init__(self):
             'VIT-G (medium strength)',
             'PLUS (high strength)',
             'PLUS (kolors genernal)',
-            'FLUX.1-dev',
+            'REGULAR - FLUX and SD3.5 only (high strength)',
             'PLUS FACE (portraits)',
             'FULL FACE - SD1.5 only (portraits stronger)',
             'COMPOSITION'
@@ -3023,7 +3023,7 @@ def error(self):
     def get_clipvision_file(self, preset, node_name):
         preset = preset.lower()
         clipvision_list = folder_paths.get_filename_list("clip_vision")
-        if preset.startswith("flux"):
+        if preset.startswith("regular"):
             # pattern = 'sigclip.vision.patch14.384'
             pattern = 'siglip.so400m.patch14.384'
         elif preset.startswith("plus (kolors") or preset.startswith("faceid plus kolors"):
@@ -3045,6 +3045,7 @@ def get_ipadapter_file(self, preset, model_type, node_name):
         is_insightface = False
         lora_pattern = None
         is_sdxl = model_type == 'sdxl'
+        is_flux = model_type == 'flux'
 
         if preset.startswith("light"):
             if is_sdxl:
@@ -3063,8 +3064,11 @@ def get_ipadapter_file(self, preset, model_type, node_name):
                 pattern = 'ip.adapter.sdxl.(safetensors|bin)$'
             else:
                 pattern = 'sd15.vit.g.(safetensors|bin)$'
-        elif preset.startswith("flux"):
-            pattern = 'ip.adapter.flux.1.dev.(safetensors|bin)$'
+        elif preset.startswith("regular"):
+            if is_flux:
+                pattern = 'ip.adapter.flux.1.dev.(safetensors|bin)$'
+            else:
+                pattern = 'ip.adapter.sd35.(safetensors|bin)$'
         elif preset.startswith("plus (high"):
             if is_sdxl:
                 pattern = 'plus.sdxl.vit.h.(safetensors|bin)$'
@@ -3218,7 +3222,7 @@ def load_model(self, model, preset, lora_model_strength, provider="CPU", clip_vi
         if not clip_vision:
             clipvision_file, clipvision_name = self.get_clipvision_file(preset, node_name)
             if clipvision_file is None:
-                if preset.lower().startswith("flux"):
+                if preset.lower().startswith("regular"):
                     # model_url = IPADAPTER_CLIPVISION_MODELS["sigclip_vision_patch14_384"]["model_url"]
                     # clipvision_file = get_local_filepath(model_url, IPADAPTER_DIR, "sigclip_vision_patch14_384.bin")
                     from huggingface_hub import snapshot_download
@@ -3253,7 +3257,7 @@ def load_model(self, model, preset, lora_model_strength, provider="CPU", clip_vi
                 log_node_info("easy ipadapterApply", f"Using ClipVisonModel {clipvision_name} Cached")
                 _, clip_vision = backend_cache.cache[clipvision_name][1]
             else:
-                if preset.lower().startswith("flux"):
+                if preset.lower().startswith("regular"):
                     from transformers import SiglipVisionModel, AutoProcessor
                     image_encoder_path = os.path.dirname(clipvision_file)
                     image_encoder = SiglipVisionModel.from_pretrained(image_encoder_path)
@@ -3352,9 +3356,13 @@ def INPUT_TYPES(cls):
     def apply(self, model, image, preset, lora_strength, provider, weight, weight_faceidv2, start_at, end_at, cache_mode, use_tiled, attn_mask=None, optional_ipadapter=None, weight_kolors=None):
         images, masks = image, [None]
         model, ipadapter = self.load_model(model, preset, lora_strength, provider, clip_vision=None, optional_ipadapter=optional_ipadapter, cache_mode=cache_mode)
-        if preset in ['FLUX.1-dev']:
-            from .ipadapter import InstantXFluxIpadapterApply
-            model, images = InstantXFluxIpadapterApply().apply_ipadapter_flux(model, ipadapter, image, weight, start_at, end_at, provider)
+        if preset == 'REGULAR - FLUX and SD3.5 only (high strength)':
+            from .ipadapter import InstantXFluxIpadapterApply, InstantXSD3IpadapterApply
+            model_type = get_sd_version(model)
+            if model_type == 'flux':
+                model, images = InstantXFluxIpadapterApply().apply_ipadapter(model, ipadapter, image, weight, start_at, end_at, provider)
+            elif model_type == 'sd3':
+                model, images = InstantXSD3IpadapterApply().apply_ipadapter(model, ipadapter, image, weight, start_at, end_at, provider)
         elif use_tiled and preset not in self.faceid_presets:
             if "IPAdapterTiled" not in ALL_NODE_CLASS_MAPPINGS:
                 self.error()
 
@@ -5,6 +5,8 @@
 import numpy as np
 from .attention_processor import IPAFluxAttnProcessor2_0
 from .utils import is_model_pathched, FluxUpdateModules
+from .sd3.resampler import TimeResampler
+from .sd3.joinblock import JointBlockIPWrapper, IPAttnProcessor
 
 image_proj_model = None
 class MLPProjModel(torch.nn.Module):
@@ -95,7 +97,7 @@ def get_image_embeds(self, pil_image=None, clip_image_embeds=None):
         image_prompt_embeds = image_proj_model(clip_image_embeds)
         return image_prompt_embeds
 
-    def apply_ipadapter_flux(self, model, ipadapter, image, weight, start_at, end_at, provider=None, use_tiled=False):
+    def apply_ipadapter(self, model, ipadapter, image, weight, start_at, end_at, provider=None, use_tiled=False):
         self.device = provider.lower()
         if "clipvision" in ipadapter:
             # self.clip_vision = ipadapter["clipvision"]['model']
@@ -127,3 +129,140 @@ def apply_ipadapter_flux(self, model, ipadapter, image, weight, start_at, end_at
 
         return (bi, image)
 
+
+def patch_sd3(
+    patcher,
+    ip_procs,
+    resampler: TimeResampler,
+    clip_embeds,
+    weight=1.0,
+    start=0.0,
+    end=1.0,
+):
+    """
+    Patches a model_sampler to add the ipadapter
+    """
+    mmdit = patcher.model.diffusion_model
+    timestep_schedule_max = patcher.model.model_config.sampling_settings.get(
+        "timesteps", 1000
+    )
+    # hook the model's forward function
+    # so that when it gets called, we can grab the timestep and send it to the resampler
+    ip_options = {
+        "hidden_states": None,
+        "t_emb": None,
+        "weight": weight,
+    }
+
+    def ddit_wrapper(forward, args):
+        # this is between 0 and 1, so the adapters can calculate start_point and end_point
+        # actually, do we need to get the sigma value instead?
+        t_percent = 1 - args["timestep"].flatten()[0].cpu().item()
+        if start <= t_percent <= end:
+            batch_size = args["input"].shape[0] // len(args["cond_or_uncond"])
+            # if we're only doing cond or only doing uncond, only pass one of them through the resampler
+            embeds = clip_embeds[args["cond_or_uncond"]]
+            # slight efficiency optimization todo: pass the embeds through and then afterwards
+            # repeat to the batch size
+            embeds = torch.repeat_interleave(embeds, batch_size, dim=0)
+            # the resampler wants between 0 and MAX_STEPS
+            timestep = args["timestep"] * timestep_schedule_max
+            image_emb, t_emb = resampler(embeds, timestep, need_temb=True)
+            # these will need to be accessible to the IPAdapters
+            ip_options["hidden_states"] = image_emb
+            ip_options["t_emb"] = t_emb
+        else:
+            ip_options["hidden_states"] = None
+            ip_options["t_emb"] = None
+
+        return forward(args["input"], args["timestep"], **args["c"])
+
+    patcher.set_model_unet_function_wrapper(ddit_wrapper)
+    # patch each dit block
+    for i, block in enumerate(mmdit.joint_blocks):
+        wrapper = JointBlockIPWrapper(block, ip_procs[i], ip_options)
+        patcher.set_model_patch_replace(wrapper, "dit", "double_block", i)
+
+class InstantXSD3IpadapterApply:
+    def __init__(self):
+        self.device = None
+        self.dtype = torch.float16
+        self.clip_image_processor = None
+        self.image_encoder = None
+        self.resampler = None
+        self.procs = None
+
+    @torch.inference_mode()
+    def encode(self, image):
+        clip_image = self.clip_image_processor.image_processor(image, return_tensors="pt", do_rescale=False).pixel_values
+        clip_image_embeds = self.image_encoder(
+            clip_image.to(self.device, dtype=self.image_encoder.dtype),
+            output_hidden_states=True,
+        ).hidden_states[-2]
+        clip_image_embeds = torch.cat(
+            [clip_image_embeds, torch.zeros_like(clip_image_embeds)], dim=0
+        )
+        clip_image_embeds = clip_image_embeds.to(dtype=torch.float16)
+        return clip_image_embeds
+
+    def apply_ipadapter(self, model, ipadapter, image, weight, start_at, end_at, provider=None, use_tiled=False):
+        self.device = provider.lower()
+        if "clipvision" in ipadapter:
+            self.image_encoder = ipadapter["clipvision"]['model']['image_encoder'].to(self.device, dtype=self.dtype)
+            self.clip_image_processor = ipadapter["clipvision"]['model']['clip_image_processor']
+        if "ipadapter" in ipadapter:
+            self.ip_ckpt = ipadapter["ipadapter"]['file']
+            self.state_dict = ipadapter["ipadapter"]['model']
+
+        self.resampler = TimeResampler(
+            dim=1280,
+            depth=4,
+            dim_head=64,
+            heads=20,
+            num_queries=64,
+            embedding_dim=1152,
+            output_dim=2432,
+            ff_mult=4,
+            timestep_in_dim=320,
+            timestep_flip_sin_to_cos=True,
+            timestep_freq_shift=0,
+        )
+        self.resampler.eval()
+        self.resampler.to(self.device, dtype=self.dtype)
+        self.resampler.load_state_dict(self.state_dict["image_proj"])
+
+        # now we'll create the attention processors
+        # ip_adapter.keys looks like [0.proj, 0.to_k, ..., 1.proj, 1.to_k, ...]
+        n_procs = len(
+            set(x.split(".")[0] for x in self.state_dict["ip_adapter"].keys())
+        )
+        self.procs = torch.nn.ModuleList(
+            [
+                # this is hardcoded for SD3.5L
+                IPAttnProcessor(
+                    hidden_size=2432,
+                    cross_attention_dim=2432,
+                    ip_hidden_states_dim=2432,
+                    ip_encoder_hidden_states_dim=2432,
+                    head_dim=64,
+                    timesteps_emb_dim=1280,
+                ).to(self.device, dtype=torch.float16)
+                for _ in range(n_procs)
+            ]
+        )
+        self.procs.load_state_dict(self.state_dict["ip_adapter"])
+
+        work_model = model.clone()
+        embeds = self.encode(image)
+
+        patch_sd3(
+            work_model,
+            self.procs,
+            self.resampler,
+            embeds,
+            weight,
+            start_at,
+            end_at,
+        )
+
+        return (work_model, image)