feat: add new layer type for diffusers-ada-ln

simpletrontdip · simpletrontdip · commit 4f1b6ce6fa2f · 2025-03-07T19:39:44.000+07:00
diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
@@ -0,0 +1,16 @@
+import torch
+
+from invokeai.backend.patches.layers.lora_layer import LoRALayer
+
+class DiffusersAdaLN_LoRALayer(LoRALayer):
+    '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped'''
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: 
+        # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
+        # while in diffusers it split into scale, shift. 
+        # So we swap the linear projection weights in order to be able to use Flux implementation
+
+        weight = super().get_weight(orig_weight) 
+        scale, shift = weight.chunk(2, dim=0) 
+        
+        return torch.cat([shift, scale], dim=0)
diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py
@@ -10,6 +10,7 @@
 from invokeai.backend.patches.layers.lokr_layer import LoKRLayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.norm_layer import NormLayer
+from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer
 
 
 def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch:
@@ -33,3 +34,10 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL
         return NormLayer.from_state_dict_values(state_dict)
     else:
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
+
+
+def diffusers_adaLN_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> DiffusersAdaLN_LoRALayer:
+    if not "lora_up.weight" in state_dict:
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
+    
+    return DiffusersAdaLN_LoRALayer.from_state_dict_values(state_dict)
diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -4,7 +4,7 @@
 
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
-from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict
+from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, diffusers_adaLN_lora_layer_from_state_dict
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
@@ -86,15 +86,8 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None:
         if src_key in grouped_state_dict:
             src_layer_dict = grouped_state_dict.pop(src_key)
             values = get_lora_layer_values(src_layer_dict)
-            
-            for _key in values.keys():
-                # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
-                # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation
-                scale, shift = values[_key].chunk(2, dim=0)
-                values[_key] = torch.cat([shift, scale], dim=0)
-
-            layers[dst_key] = any_lora_layer_from_state_dict(values)
-
+            layers[dst_key] = diffusers_adaLN_lora_layer_from_state_dict(values)
+    
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
         src_weight_shapes: list[tuple[int, int]],