feat: add missing adaLN layer in lora conversion

simpletrontdip · simpletrontdip · commit 6efad432af6d · 2025-02-28T14:49:05.000+07:00
diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -82,6 +82,19 @@ def add_lora_layer_if_present(src_key: str, dst_key: str) -> None:
             values = get_lora_layer_values(src_layer_dict)
             layers[dst_key] = any_lora_layer_from_state_dict(values)
 
+    def add_lora_adaLN_layer_if_present(src_key: str, dst_key: str) -> None:
+        if src_key in grouped_state_dict:
+            src_layer_dict = grouped_state_dict.pop(src_key)
+            values = get_lora_layer_values(src_layer_dict)
+            
+            for _key in values.keys():
+                # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
+                # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation
+                scale, shift = values[_key].chunk(2, dim=0)
+                values[_key] = torch.cat([shift, scale], dim=0)
+
+            layers[dst_key] = any_lora_layer_from_state_dict(values)
+
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
         src_weight_shapes: list[tuple[int, int]],
@@ -223,6 +236,10 @@ def add_qkv_lora_layer_if_present(
 
     # Final layer.
     add_lora_layer_if_present("proj_out", "final_layer.linear")
+    add_lora_adaLN_layer_if_present(
+        'norm_out.linear',
+        'final_layer.adaLN_modulation.1',
+    )
 
     # Assert that all keys were processed.
     assert len(grouped_state_dict) == 0