rename, simplify (#354)

kylesayrs · web-flow · commit 54f5b4e99959 · 2025-06-12T15:08:03.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/utils/offload.py b/src/compressed_tensors/utils/offload.py
@@ -14,27 +14,29 @@
 """
 Utilities associated with offloading functionality provided by `accelerate`.
 
-| ----------------------------------------------------------------------------------------------------- | # noqa: E501
-| Operation | Without offloading support             | With offloading support                          | # noqa: E501
-| --------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
-| Add       | module.register_parameter(name, param) | register_offload_parameter(module, name, param)  | # noqa: E501
-| Check     | N/A                                    | has_offloaded_params(module)                     | # noqa: E501
-| Onload    | N/A                                    | with align_module_device(module)                 | # noqa: E501
-| Update    | module.name.data.copy_(new_data)       | update_offload_parameter(module, name, new_data) | # noqa: E501
-| Delete    | del module.name                        | delete_offload_parameter(module, name)           | # noqa: E501
-| ----------------------------------------------------------------------------------------------------- | # noqa: E501
+| ------------------------------------------------------------------------------------------------------ | # noqa: E501
+| Operation  | Without offloading support             | With offloading support                          | # noqa: E501
+| ---------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
+| Add        | module.register_parameter(name, param) | register_offload_parameter(module, name, param)  | # noqa: E501
+| Check      | N/A                                    | has_offloaded_params(module)                     | # noqa: E501
+| Onload     | N/A                                    | with align_module_device(module)                 | # noqa: E501
+| Update     | module.name.data.copy_(new_data)       | update_offload_parameter(module, name, new_data) | # noqa: E501
+| Delete     | del module.name                        | delete_offload_parameter(module, name)           | # noqa: E501
+| Add Module | module.register_module(name, child)    | register_offload_module(name, child)             | # noqa: E501
+| Del Module | del module.name                        | delete_offload_module(module, name)              | # noqa: E501
+| ------------------------------------------------------------------------------------------------------ | # noqa: E501
 """
 
 import contextlib
 import warnings
 from functools import wraps
-from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Union
+from operator import attrgetter
+from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
 
 import torch
 
 
 try:
-    from accelerate import dispatch_model
     from accelerate.hooks import (
         AlignDevicesHook,
         add_hook_to_module,
@@ -45,10 +47,12 @@
     from accelerate.utils import (
         OffloadedWeightsLoader,
         PrefixedDataset,
+        find_tied_parameters,
         set_module_tensor_to_device,
     )
 
     _has_accelerate = True
+
 except ImportError:
     _has_accelerate = False
     AlignDevicesHook = None
@@ -58,8 +62,8 @@
     PrefixedDataset = None
     set_module_tensor_to_device = None
     named_module_tensors = None
-    dispatch_model = None
     attach_align_device_hook = None
+    find_tied_parameters = None
 
 
 __all__ = [
@@ -78,14 +82,13 @@
     "align_module_device",
     "register_offload_module",
     "delete_offload_module",
-    "force_cpu_offload",
+    "offloaded_dispatch",
 ]
 
 
 def check_accelerate(fallback: Any):
     def decorator(func: Callable[[Any], Any]):
         if not _has_accelerate:
-
             if fallback == "error":
 
                 @wraps(func)
@@ -479,46 +482,44 @@ def delete_offload_module(base: torch.nn.Module, name: str):
 
 
 @check_accelerate(fallback="error")
-def force_cpu_offload(
-    module: torch.nn.Module, execution_device: torch.device
+def offloaded_dispatch(
+    module: torch.nn.Module,
+    execution_device: torch.device,
+    offload_device: Union[torch.device, Literal["disk"]] = torch.device("cpu"),
 ) -> torch.nn.Module:
     """
-    Force cpu offloading a module, primarily used for testing
+    Unlike `dispatch_model`, this function forces a module (and its submodules) to
+    offload all parameters and replace them with meta tensors, utiliizing the
+    `AlignDevicesHook` to control onloading and offloading.
 
     :param module: module containing parameters to offload
-    :param execution_device: execution device submodules
-    :return: module with hooks to perform cpu offloading
+    :param execution_device: device that modules will onload and execute on
+    :param offload_device: device that module parameters will offload to
+    :return: module with offloading device hooks
     """
-    # edge case: there is a bug in `dispatch_model` which causes
-    # the function to only work if the model contains submodules
-    if next(module.children(), None) is None:
-        attach_align_device_hook(
-            module,
-            execution_device=execution_device,
-            offload=True,
-            weights_map=module.state_dict(),
-            tied_params_map={},
-        )
-        return module
-
-    device_map = {}
-
-    def collect_device_map(name: List[str], module: torch.nn.Module):
-        if next(module.parameters(recurse=False), None) is not None:
-            device_map[".".join(name)] = "cpu"
-            return
-
-        else:
-            for submodule_name, submodule in module.named_children():
-                name.append(submodule_name)
-                collect_device_map(name, submodule)
-                name.pop()
-
-    collect_device_map([], module)
-
-    return dispatch_model(
-        module, device_map, main_device=execution_device, force_hooks=True
+    if offload_device == "disk":
+        raise NotImplementedError("Disk offloading is not currently supported")
+
+    # create weights map
+    weights_map = OffloadedWeightsLoader(state_dict=module.state_dict(), device="cpu")
+
+    # create tied params map
+    tied_params = find_tied_parameters(module)
+    tied_params_map = {}
+    for group in tied_params:
+        for param_name in group:
+            data_ptr = attrgetter(param_name)(module).data_ptr()
+            tied_params_map[data_ptr] = {}
+
+    # recursively attaches hooks to all submodules
+    attach_align_device_hook(
+        module,
+        execution_device=execution_device,
+        offload=True,
+        weights_map=weights_map,
+        tied_params_map=tied_params_map,
     )
+    return module
 
 
 """ Upstreamed Functions """
diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
@@ -19,7 +19,7 @@
     TransformFactory,
     TransformScheme,
 )
-from compressed_tensors.utils import align_modules, force_cpu_offload
+from compressed_tensors.utils import offloaded_dispatch
 from tests.testing_utils import requires_accelerate, requires_gpu
 
 
@@ -75,7 +75,7 @@ def test_correctness_model(scheme, offload=False):
     # load model
     model = TransformableModel(2, 4, 8, 16, 32, 64)
     if offload:
-        model = force_cpu_offload(model, torch.device("cuda"))
+        model = offloaded_dispatch(model, torch.device("cuda"))
 
     # create factory
     scheme.apply = [
diff --git a/tests/test_transform/factory/test_memory.py b/tests/test_transform/factory/test_memory.py
@@ -22,7 +22,7 @@
     TransformFactory,
     TransformScheme,
 )
-from compressed_tensors.utils import align_modules, force_cpu_offload
+from compressed_tensors.utils import align_modules, offloaded_dispatch
 from tests.testing_utils import requires_accelerate, requires_gpu
 
 
@@ -58,7 +58,7 @@ def test_memory_sharing(scheme, offload=False):
     # load model (maybe with offloading)
     model = TransformableModel(2, 2, 4, 4, 8, 8)
     if offload:
-        force_cpu_offload(model, torch.device("cuda"))
+        offloaded_dispatch(model, torch.device("cuda"))
 
     # add transforms to model
     factory.apply_to_model(model)
diff --git a/tests/test_utils/test_offload.py b/tests/test_utils/test_offload.py
@@ -19,9 +19,9 @@
     delete_offload_module,
     delete_offload_parameter,
     disable_hf_hook,
-    force_cpu_offload,
     get_execution_device,
     has_offloaded_params,
+    offloaded_dispatch,
     register_offload_module,
     register_offload_parameter,
     update_offload_parameter,
@@ -111,15 +111,15 @@ def test_register_offload_parameter():
 
     # register a param prior to offloading
     register_offload_parameter(module, "c", parameter)
-    assert hasattr(module, "c") and module.c == parameter
+    assert module.c == parameter
 
     # offloading, check that added param was offloaded
     attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
     assert "c" in module._hf_hook.weights_map
 
     # register a param after offloading, check that added param was offloaded
     register_offload_parameter(module, "d", parameter)
-    assert hasattr(module, "d") and module.d.device == torch.device("meta")
+    assert module.d.device == torch.device("meta")
     assert module._hf_hook.weights_map["d"].device == torch.device("cpu")
 
     # added parameters can be onloaded and offloaded
@@ -358,7 +358,7 @@ def test_register_offload_module(exec_device):
     # with offloading
     model = ExampleModel()
     child = torch.nn.Linear(2, 3)
-    force_cpu_offload(model, exec_device)
+    offloaded_dispatch(model, exec_device)
     register_offload_module(model, "child", child)
     register_offload_module(model.linear, "child", child)
     assert child in model.children()
@@ -386,7 +386,7 @@ def test_delete_offload_module(exec_device):
     # with offloading
     model = ExampleModel()
     child = torch.nn.Linear(2, 3)
-    force_cpu_offload(model, exec_device)
+    offloaded_dispatch(model, exec_device)
     register_offload_module(model, "child", child)
     register_offload_module(model.linear, "child", child)
     delete_offload_module(model, "child")
@@ -398,10 +398,10 @@ def test_delete_offload_module(exec_device):
 @requires_gpu
 @requires_accelerate()
 @pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
-def test_force_cpu_offload(exec_device):
+def test_offloaded_dispatch(exec_device):
     # single module
     module = torch.nn.Linear(1, 2)
-    module = force_cpu_offload(module, exec_device)
+    module = offloaded_dispatch(module, exec_device)
     assert has_offloaded_params(module)
     assert module._hf_hook.offload
     assert module.weight.device == torch.device("meta")
@@ -413,7 +413,7 @@ def test_force_cpu_offload(exec_device):
 
     # model
     model = ExampleModel()
-    model = force_cpu_offload(model, exec_device)
+    model = offloaded_dispatch(model, exec_device)
     assert not has_offloaded_params(model)
 
     assert has_offloaded_params(model.linear)
@@ -424,3 +424,9 @@ def test_force_cpu_offload(exec_device):
 
     # can run
     model(torch.empty(1, device=exec_device))
+
+    # can add new params
+    parameter = torch.nn.Parameter(torch.tensor(1.0))
+    register_offload_parameter(module, "new_param", parameter)
+    assert module.new_param.device == torch.device("meta")
+    assert module._hf_hook.weights_map["new_param"].device == torch.device("cpu")