Merge branch 'kylesayrs/transform-accelerate-utilities' into kylesayrs/transform_factory

kylesayrs · kylesayrs · commit d77bcef793db · 2025-05-31T00:50:20.000-04:00
diff --git a/src/compressed_tensors/utils/offload.py b/src/compressed_tensors/utils/offload.py
@@ -77,6 +77,7 @@
     "align_modules",
     "align_module_device",
     "register_offload_module",
+    "delete_offload_module",
     "force_cpu_offload",
 ]
 
@@ -398,7 +399,6 @@ def align_modules(
         yield
 
 
-@check_accelerate(fallback=None)
 def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.Module):
     """
     Register a submodule with offloading if the parent module is offloaded
@@ -459,6 +459,20 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
     # online way, assume that all pointers are shared. This comes at no runtime cost
 
 
+def delete_offload_module(base: torch.nn.Module, name: str):
+    """
+    Delete a submodule from a model which may contain offloading
+    :param base: parent module to delete submodule from
+    :param name: name of submodule on parent
+    """
+    module: torch.nn.Module = getattr(base, name)
+
+    for param_name, _ in list(module.named_parameters()):
+        delete_offload_parameter(module, param_name)
+
+    delattr(base, name)
+
+
 @check_accelerate(fallback="error")
 def force_cpu_offload(
     module: torch.nn.Module, execution_device: torch.device
diff --git a/tests/test_utils/test_offload.py b/tests/test_utils/test_offload.py
@@ -16,6 +16,7 @@
 from compressed_tensors.utils import (
     align_module_device,
     align_modules,
+    delete_offload_module,
     delete_offload_parameter,
     disable_hf_hook,
     force_cpu_offload,
@@ -344,9 +345,8 @@ def test_offload_to_weights_map():
 
 @requires_gpu
 @requires_accelerate()
-def test_register_offload_module():
-    execution_device = torch.device("cuda")
-
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_register_offload_module(exec_device):
     # no offloading
     model = ExampleModel()
     child = torch.nn.Linear(2, 3)
@@ -358,37 +358,62 @@ def test_register_offload_module():
     # with offloading
     model = ExampleModel()
     child = torch.nn.Linear(2, 3)
-    force_cpu_offload(model, execution_device)
+    force_cpu_offload(model, exec_device)
     register_offload_module(model, "child", child)
     register_offload_module(model.linear, "child", child)
     assert child in model.children()
     assert child in model.linear.children()
 
     # can run modules
     model(torch.empty(1))
-    child(torch.empty(2, device=execution_device))
+    child(torch.empty(2, device=exec_device))
 
 
 @requires_gpu
 @requires_accelerate()
-def test_force_cpu_offload():
-    execution_device = torch.device("cuda")
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_delete_offload_module(exec_device):
+    # no offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    delete_offload_module(model, "child")
+    delete_offload_module(model.linear, "child")
+    assert not child in model.children()
+    assert not child in model.linear.children()
 
+    # with offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    force_cpu_offload(model, exec_device)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    delete_offload_module(model, "child")
+    delete_offload_module(model.linear, "child")
+    assert not child in model.children()
+    assert not child in model.linear.children()
+
+
+@requires_gpu
+@requires_accelerate()
+@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
+def test_force_cpu_offload(exec_device):
     # single module
     module = torch.nn.Linear(1, 2)
-    module = force_cpu_offload(module, execution_device)
+    module = force_cpu_offload(module, exec_device)
     assert has_offloaded_params(module)
     assert module._hf_hook.offload
     assert module.weight.device == torch.device("meta")
     assert "weight" in module._hf_hook.weights_map
     assert module._hf_hook.tied_params_map is not None
 
     # can run
-    module(torch.empty(1, device=execution_device))
+    module(torch.empty(1, device=exec_device))
 
     # model
     model = ExampleModel()
-    model = force_cpu_offload(model, execution_device)
+    model = force_cpu_offload(model, exec_device)
     assert not has_offloaded_params(model)
 
     assert has_offloaded_params(model.linear)
@@ -398,4 +423,4 @@ def test_force_cpu_offload():
     assert model.linear._hf_hook.tied_params_map is not None
 
     # can run
-    model(torch.empty(1, device=execution_device))
+    model(torch.empty(1, device=exec_device))