[Accelerate] Expand get_execution_device to support models (#363)

kylesayrs · web-flow · commit 10e4e559c3cb · 2025-06-20T16:22:16.000-04:00
* expand to support models

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* reduce assumptions

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* better model testing

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add mark for tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstring

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

---------

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/utils/offload.py b/src/compressed_tensors/utils/offload.py
@@ -171,22 +171,24 @@ def update_parameter_data(
 
 def get_execution_device(module: torch.nn.Module) -> torch.device:
     """
-    Get the device which inputs should be moved to before module execution
+    Get the device which inputs should be moved to before module execution.
+    Assume that modules execute in the same order as returned by `model.modules()`
 
     :param module: module to check, may be offloaded
     :return: onload device of module
     """
-    if has_offloaded_params(module):
-        return module._hf_hook.execution_device
+    for module in module.modules():
+        if has_offloaded_params(module):
+            return module._hf_hook.execution_device
 
-    first_param = next(module.parameters(), None)
-    if first_param is None:
-        warnings.warn(
-            f"Unable able to infer execution device of {module}, falling back to CPU"
-        )
-        return torch.device("cpu")
+        param = next(module.parameters(recurse=False), None)
+        if param is not None:
+            return param.device
 
-    return first_param.device
+    warnings.warn(
+        f"Unable able to get execution device of {module}, falling back to CPU"
+    )
+    return torch.device("cpu")
 
 
 def register_offload_parameter(
diff --git a/tests/test_utils/test_offload.py b/tests/test_utils/test_offload.py
@@ -102,6 +102,25 @@ def test_get_execution_device():
         assert get_execution_device(module) == torch.device("cuda:0")
 
 
+@requires_gpu
+@requires_accelerate()
+def test_get_execution_device_model():
+    class Model(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.a = torch.nn.Linear(1, 2)
+            self.b = torch.nn.Linear(2, 2, device="cuda:0")
+
+        def forward(self, x):
+            return self.b(self.a(x).to("cuda:0"))
+
+    model = Model()
+    assert get_execution_device(model) == torch.device("cpu")
+
+    offloaded_dispatch(model.a, torch.device("cuda:0"))
+    assert get_execution_device(model) == torch.device("cuda:0")
+
+
 @requires_accelerate()
 def test_register_offload_parameter():
     from accelerate import init_empty_weights