[Utils] add align_modules (#282)

kylesayrs · web-flow · commit 4949912b3453 · 2025-04-28T11:16:05.000-04:00
* add align_modules

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* better implementation

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add align_modules

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add docstrings

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* remove comment

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* docstring and typo

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

---------

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/utils/offload.py b/src/compressed_tensors/utils/offload.py
@@ -28,7 +28,7 @@
 import contextlib
 import warnings
 from functools import wraps
-from typing import Any, Callable, Dict, Literal, Optional, Union
+from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
 
 import torch
 
@@ -67,6 +67,8 @@
     "delete_offload_parameter",
     "has_offloaded_params",
     "disable_hf_hook",
+    "disable_offload",
+    "align_modules",
     "align_module_device",
 ]
 
@@ -344,6 +346,43 @@ def delete_from_weights_map(
         )
 
 
+@contextlib.contextmanager
+def disable_offload(module: torch.nn.Module):
+    """
+    Context manager to disable module onloading and offloading. Parameters will stay on
+    their current device
+
+    :param module: module to disable offloading for
+    """
+    if has_offloaded_params(module):
+        module._hf_hook.offload = False
+        yield
+        module._hf_hook.offload = True
+    else:
+        yield
+
+
+@contextlib.contextmanager
+def align_modules(
+    modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
+    execution_device: Optional[torch.device] = None,
+):
+    """
+    Context manager for onloading modules to a device, and disabling onload and offload
+    attempts triggered by forward calls. Used for sequential onloading of layers
+
+    :param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
+    :param execution_device: device to onload to
+    """
+    modules = (modules,) if isinstance(modules, torch.nn.Module) else modules
+
+    with contextlib.ExitStack() as stack:
+        for module in modules:
+            stack.enter_context(align_module_device(module, execution_device))
+            stack.enter_context(disable_offload(module))  # disable redundant onloading
+        yield
+
+
 """ Upstreamed Functions """
 
 
diff --git a/tests/test_utils/test_offload.py b/tests/test_utils/test_offload.py
@@ -15,6 +15,7 @@
 import torch
 from compressed_tensors.utils import (
     align_module_device,
+    align_modules,
     delete_offload_parameter,
     disable_hf_hook,
     get_execution_device,
@@ -248,6 +249,35 @@ def test_disable_hf_hook_model_recurse():
     assert hasattr(module2, "_hf_hook")
 
 
+@requires_accelerate()
+def test_align_modules():
+    from accelerate.hooks import attach_align_device_hook
+
+    module0 = ExampleModule()
+    module1 = ExampleModule()
+    module2 = ExampleModule()
+    model = torch.nn.Sequential(module0, torch.nn.Sequential(module1, module2))
+    attach_align_device_hook(
+        model,
+        execution_device=torch.device("cpu"),
+        offload=True,
+        weights_map=model.state_dict(),
+    )
+
+    assert module0.a.device == torch.device("meta")
+    assert module1.a.device == torch.device("meta")
+    assert module2.a.device == torch.device("meta")
+
+    with align_modules((module0, module1)):
+        assert module0.a.device != torch.device("meta")
+        assert module1.a.device != torch.device("meta")
+        assert module2.a.device == torch.device("meta")
+
+    assert module0.a.device == torch.device("meta")
+    assert module1.a.device == torch.device("meta")
+    assert module2.a.device == torch.device("meta")
+
+
 @requires_accelerate()
 def test_offload_to_weights_map():
     from accelerate.utils import OffloadedWeightsLoader, PrefixedDataset