improvements

a-r-r-o-w · a-r-r-o-w · commit 937264736859 · 2025-01-12T20:08:45.000+01:00
diff --git a/src/diffusers/hooks/layerwise_upcasting.py b/src/diffusers/hooks/layerwise_upcasting.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import re
-from typing import List, Type
+from typing import Optional, Tuple, Type
 
 import torch
 
@@ -25,13 +25,13 @@
 
 
 # fmt: off
-_SUPPORTED_PYTORCH_LAYERS = [
+_SUPPORTED_PYTORCH_LAYERS = (
     torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Conv3d,
     torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d, torch.nn.ConvTranspose3d,
     torch.nn.Linear,
-]
+)
 
-_DEFAULT_SKIP_MODULES_PATTERN = ["pos_embed", "patch_embed", "norm"]
+_DEFAULT_SKIP_MODULES_PATTERN = ("pos_embed", "patch_embed", "norm")
 # fmt: on
 
 
@@ -66,10 +66,11 @@ def apply_layerwise_upcasting(
     module: torch.nn.Module,
     storage_dtype: torch.dtype,
     compute_dtype: torch.dtype,
-    skip_modules_pattern: List[str] = _DEFAULT_SKIP_MODULES_PATTERN,
-    skip_modules_classes: List[Type[torch.nn.Module]] = [],
+    skip_modules_pattern: Optional[Tuple[str]] = _DEFAULT_SKIP_MODULES_PATTERN,
+    skip_modules_classes: Optional[Tuple[Type[torch.nn.Module]]] = [],
     non_blocking: bool = False,
-) -> torch.nn.Module:
+    _prefix: str = "",
+) -> None:
     r"""
     Applies layerwise upcasting to a given module. The module expected here is a Diffusers ModelMixin but it can be any
     nn.Module using diffusers layers or pytorch primitives.
@@ -82,30 +83,45 @@ def apply_layerwise_upcasting(
             The dtype to cast the module to before/after the forward pass for storage.
         compute_dtype (`torch.dtype`):
             The dtype to cast the module to during the forward pass for computation.
-        skip_modules_pattern (`List[str]`, defaults to `["pos_embed", "patch_embed", "norm"]`):
+        skip_modules_pattern (`Tuple[str]`, defaults to `["pos_embed", "patch_embed", "norm"]`):
             A list of patterns to match the names of the modules to skip during the layerwise upcasting process.
-        skip_modules_classes (`List[Type[torch.nn.Module]]`, defaults to `[]`):
+        skip_modules_classes (`Tuple[Type[torch.nn.Module]]`, defaults to `[]`):
             A list of module classes to skip during the layerwise upcasting process.
         non_blocking (`bool`, defaults to `False`):
             If `True`, the weight casting operations are non-blocking.
     """
-    for name, submodule in module.named_modules():
-        if (
-            any(re.search(pattern, name) for pattern in skip_modules_pattern)
-            or any(isinstance(submodule, module_class) for module_class in skip_modules_classes)
-            or not isinstance(submodule, tuple(_SUPPORTED_PYTORCH_LAYERS))
-            or len(list(submodule.children())) > 0
-        ):
-            logger.debug(f'Skipping layerwise upcasting for layer "{name}"')
-            continue
-        logger.debug(f'Applying layerwise upcasting to layer "{name}"')
-        apply_layerwise_upcasting_hook(submodule, storage_dtype, compute_dtype, non_blocking)
-    return module
+    if skip_modules_classes is None and skip_modules_pattern is None:
+        apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype, non_blocking)
+        return
+
+    should_skip = (skip_modules_classes is not None and isinstance(module, skip_modules_classes)) or (
+        skip_modules_pattern is not None and any(re.search(pattern, _prefix) for pattern in skip_modules_pattern)
+    )
+    if should_skip:
+        logger.debug(f'Skipping layerwise upcasting for layer "{_prefix}"')
+        return
+
+    if isinstance(module, _SUPPORTED_PYTORCH_LAYERS):
+        logger.debug(f'Applying layerwise upcasting to layer "{_prefix}"')
+        apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype, non_blocking)
+        return
+
+    for name, submodule in module.named_children():
+        layer_name = f"{_prefix}.{name}" if _prefix else name
+        apply_layerwise_upcasting(
+            submodule,
+            storage_dtype,
+            compute_dtype,
+            skip_modules_pattern,
+            skip_modules_classes,
+            non_blocking,
+            _prefix=layer_name,
+        )
 
 
 def apply_layerwise_upcasting_hook(
     module: torch.nn.Module, storage_dtype: torch.dtype, compute_dtype: torch.dtype, non_blocking: bool
-) -> torch.nn.Module:
+) -> None:
     r"""
     Applies a `LayerwiseUpcastingHook` to a given module.
 
@@ -118,10 +134,6 @@ def apply_layerwise_upcasting_hook(
             The dtype to cast the module to during the forward pass.
         non_blocking (`bool`):
             If `True`, the weight casting operations are non-blocking.
-
-    Returns:
-        `torch.nn.Module`:
-            The same module, with the hook attached (the module is modified in place).
     """
     registry = HookRegistry.check_if_exists_or_initialize(module)
     hook = LayerwiseUpcastingHook(storage_dtype, compute_dtype, non_blocking)
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -376,18 +376,17 @@ def enable_layerwise_upcasting(
             skip_modules_pattern.extend(self._keep_in_fp32_modules)
         if self._always_upcast_modules is not None:
             skip_modules_pattern.extend(self._always_upcast_modules)
-        skip_modules_pattern = list(set(skip_modules_pattern))
+        skip_modules_pattern = tuple(set(skip_modules_pattern))
 
         if skip_modules_classes is None:
-            skip_modules_classes = []
+            skip_modules_classes = ()
         if is_peft_available():
             # By default, we want to skip all peft layers because they have a very low memory footprint.
             # If users want to apply layerwise upcasting on peft layers as well, they can utilize the
             # `~diffusers.hooks.layerwise_upcasting.apply_layerwise_upcasting` function which provides
             # them with more flexibility and control.
-            from peft.tuners.tuners_utils import BaseTunerLayer
-
-            skip_modules_classes.append(BaseTunerLayer)
+            if "lora" not in skip_modules_pattern:
+                skip_modules_pattern += ("lora",)
 
         if compute_dtype is None:
             logger.info("`compute_dtype` not provided when enabling layerwise upcasting. Using dtype of the model.")