[Quality] Better device checks

Vincent Moens · Vincent Moens · commit 382430db3c45 · 2025-04-22T08:04:33.000+01:00
ghstack-source-id: 7174415 Pull Request resolved: #2909
diff --git a/test/test_env.py b/test/test_env.py
@@ -1217,47 +1217,52 @@ def test_env_with_batch_size(
     @pytest.mark.skipif(not _has_dmc, reason="no dm_control")
     @pytest.mark.parametrize("env_task", ["stand,stand,stand", "stand,walk,stand"])
     @pytest.mark.parametrize("share_individual_td", [True, False])
+    @pytest.mark.parametrize("device", get_default_devices())
     def test_multi_task_serial_parallel(
-        self, env_task, share_individual_td, maybe_fork_ParallelEnv
+        self, env_task, share_individual_td, maybe_fork_ParallelEnv, device
     ):
-        try:
-            tasks = env_task.split(",")
-            if len(tasks) == 1:
-                single_task = True
+        tasks = env_task.split(",")
+        if len(tasks) == 1:
+            single_task = True
 
-                def env_make():
-                    return DMControlEnv("humanoid", tasks[0])
+            def env_make():
+                return DMControlEnv("humanoid", tasks[0], device=device)
 
-            elif len(set(tasks)) == 1 and len(tasks) == 3:
-                single_task = True
-                env_make = [lambda: DMControlEnv("humanoid", tasks[0])] * 3
-            else:
-                single_task = False
-                env_make = [
-                    lambda task=task: DMControlEnv("humanoid", task) for task in tasks
-                ]
+        elif len(set(tasks)) == 1 and len(tasks) == 3:
+            single_task = True
+            env_make = [lambda: DMControlEnv("humanoid", tasks[0], device=device)] * 3
+        else:
+            single_task = False
+            env_make = [
+                lambda task=task: DMControlEnv("humanoid", task, device=device)
+                for task in tasks
+            ]
 
-            env_serial = SerialEnv(3, env_make, share_individual_td=share_individual_td)
+        env_serial = SerialEnv(3, env_make, share_individual_td=share_individual_td)
+        try:
             env_serial.start()
             assert env_serial._single_task is single_task
+
+            env_serial.set_seed(0)
+            torch.manual_seed(0)
+            td_serial = env_serial.rollout(max_steps=50)
+        finally:
+            env_serial.close(raise_if_closed=False)
+
+        try:
             env_parallel = maybe_fork_ParallelEnv(
                 3, env_make, share_individual_td=share_individual_td
             )
             env_parallel.start()
             assert env_parallel._single_task is single_task
 
-            env_serial.set_seed(0)
-            torch.manual_seed(0)
-            td_serial = env_serial.rollout(max_steps=50)
-
             env_parallel.set_seed(0)
             torch.manual_seed(0)
             td_parallel = env_parallel.rollout(max_steps=50)
 
             assert_allclose_td(td_serial, td_parallel)
         finally:
             env_parallel.close(raise_if_closed=False)
-            env_serial.close(raise_if_closed=False)
 
     @pytest.mark.skipif(not _has_dmc, reason="no dm_control")
     def test_multitask(self, maybe_fork_ParallelEnv):
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 from __future__ import annotations
 
+import collections
 import functools
 import gc
 import importlib.util
@@ -1762,6 +1763,43 @@ def test_dmcontrol(self, env_name, task, frame_skip, from_pixels, pixels_only):
         assert final_seed0 == final_seed2
         assert_allclose_td(rollout0, rollout2)
 
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires cuda")
+    @pytest.mark.parametrize("env_name,task", [["cheetah", "run"]])
+    @pytest.mark.parametrize("frame_skip", [1, 3])
+    @pytest.mark.parametrize(
+        "from_pixels,pixels_only", [[True, True], [True, False], [False, False]]
+    )
+    def test_dmcontrol_device_consistency(
+        self, env_name, task, frame_skip, from_pixels, pixels_only
+    ):
+        env0 = DMControlEnv(
+            env_name,
+            task,
+            frame_skip=frame_skip,
+            from_pixels=from_pixels,
+            pixels_only=pixels_only,
+            device="cpu",
+        )
+
+        env1 = DMControlEnv(
+            env_name,
+            task,
+            frame_skip=frame_skip,
+            from_pixels=from_pixels,
+            pixels_only=pixels_only,
+            device="cuda",
+        )
+
+        env0.set_seed(0)
+        r0 = env0.rollout(100, break_when_any_done=False)
+        assert r0.device == torch.device("cpu")
+        actions = collections.deque(r0["action"].unbind(0))
+        policy = lambda td: td.set("action", actions.popleft())
+        env1.set_seed(0)
+        r1 = env1.rollout(100, policy, break_when_any_done=False)
+        assert r1.device == torch.device("cuda:0")
+        assert_allclose_td(r0, r1.cpu())
+
     @pytest.mark.parametrize("env_name,task", [["cheetah", "run"]])
     @pytest.mark.parametrize("frame_skip", [1, 3])
     @pytest.mark.parametrize(
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
@@ -159,23 +159,29 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
                 action_tensordict = tensordict
                 action_key = self.action_key
 
-            out = action_tensordict.get(action_key)
+            action = action_tensordict.get(action_key)
             eps = self.eps
-            cond = torch.rand(action_tensordict.shape, device=out.device) < eps
+            device = eps.device
+            action_device = action.device
+            if action_device is not None and action_device != device:
+                raise RuntimeError(
+                    f"Expected action and e-greedy module to be on the same device, but got {action.device=} and e-greedy device={device}."
+                )
+            cond = torch.rand(action_tensordict.shape, device=device) < eps
             # cond = torch.zeros(action_tensordict.shape, device=out.device, dtype=torch.bool).bernoulli_(eps)
-            cond = expand_as_right(cond, out)
+            cond = expand_as_right(cond, action)
             spec = self.spec
             if spec is not None:
                 if isinstance(spec, Composite):
                     spec = spec[self.action_key]
-                if spec.shape != out.shape:
+                if spec.shape != action.shape:
                     # In batched envs if the spec is passed unbatched, the rand() will not
                     # cover all batched dims
                     if (
                         not len(spec.shape)
-                        or out.shape[-len(spec.shape) :] == spec.shape
+                        or action.shape[-len(spec.shape) :] == spec.shape
                     ):
-                        spec = spec.expand(out.shape)
+                        spec = spec.expand(action.shape)
                     else:
                         raise ValueError(
                             "Action spec shape does not match the action shape"
@@ -188,12 +194,12 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
                         )
                     spec.update_mask(action_mask)
                 r = spec.rand()
-                if r.device != out.device:
-                    r = r.to(out.device)
-                out = torch.where(cond, r, out)
+                if r.device != device:
+                    r = r.to(device)
+                action = torch.where(cond, r, action)
             else:
                 raise RuntimeError("spec must be provided to the exploration wrapper.")
-            action_tensordict.set(action_key, out)
+            action_tensordict.set(action_key, action)
         return tensordict