[BugFix] Fix PEnv device copies

Vincent Moens · Vincent Moens · commit c83c04cc7413 · 2025-03-10T10:42:24.000Z
ghstack-source-id: df39fd2 Pull Request resolved: #2840 (cherry picked from commit 6e40548)
diff --git a/test/test_env.py b/test/test_env.py
@@ -1599,6 +1599,34 @@ def test_parallel_env_device(
         env_serial.close()
         env0.close()
 
+    @pytest.mark.skipif(not _has_gym, reason="no gym")
+    @pytest.mark.parametrize("env_device", [None, "cpu"])
+    def test_parallel_env_device_vs_no_device(self, maybe_fork_ParallelEnv, env_device):
+        def make_env() -> GymEnv:
+            env = GymEnv(PENDULUM_VERSIONED(), device=env_device)
+            return env.append_transform(DoubleToFloat())
+
+        # Rollouts work with a regular env
+        parallel_env = maybe_fork_ParallelEnv(
+            num_workers=1, create_env_fn=make_env, device=None
+        )
+        parallel_env.reset()
+        parallel_env.set_seed(0)
+        torch.manual_seed(0)
+
+        parallel_rollout = parallel_env.rollout(max_steps=10)
+
+        # Rollout doesn't work with Parallelnv
+        parallel_env = maybe_fork_ParallelEnv(
+            num_workers=1, create_env_fn=make_env, device="cpu"
+        )
+        parallel_env.reset()
+        parallel_env.set_seed(0)
+        torch.manual_seed(0)
+
+        parallel_rollout_cpu = parallel_env.rollout(max_steps=10)
+        assert_allclose_td(parallel_rollout, parallel_rollout_cpu)
+
     @pytest.mark.skipif(not _has_gym, reason="no gym")
     @pytest.mark.flaky(reruns=3, reruns_delay=1)
     @pytest.mark.parametrize(
diff --git a/torchrl/envs/batched_envs.py b/torchrl/envs/batched_envs.py
@@ -374,6 +374,14 @@ def __init__(
 
     is_spec_locked = EnvBase.is_spec_locked
 
+    def select_and_clone(self, name, tensor, selected_keys=None):
+        if selected_keys is None:
+            selected_keys = self._selected_step_keys
+        if name in selected_keys:
+            if self.device is not None and tensor.device != self.device:
+                return tensor.to(self.device, non_blocking=self.non_blocking)
+            return tensor.clone()
+
     @property
     def non_blocking(self):
         nb = self._non_blocking
@@ -1062,12 +1070,10 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
         selected_output_keys = self._selected_reset_keys_filt
 
         # select + clone creates 2 tds, but we can create one only
-        def select_and_clone(name, tensor):
-            if name in selected_output_keys:
-                return tensor.clone()
-
         out = self.shared_tensordict_parent.named_apply(
-            select_and_clone,
+            lambda *args: self.select_and_clone(
+                *args, selected_keys=selected_output_keys
+            ),
             nested_keys=True,
             filter_empty=True,
         )
@@ -1135,14 +1141,14 @@ def _step(
             # will be modified in-place at further steps
             device = self.device
 
-            def select_and_clone(name, tensor):
-                if name in self._selected_step_keys:
-                    return tensor.clone()
+            selected_keys = self._selected_step_keys
 
             if partial_steps is not None:
                 next_td = TensorDict.lazy_stack([next_td[i] for i in workers_range])
             out = next_td.named_apply(
-                select_and_clone, nested_keys=True, filter_empty=True
+                lambda *args: self.select_and_clone(*args, selected_keys),
+                nested_keys=True,
+                filter_empty=True,
             )
             if out_tds is not None:
                 out.update(
@@ -1841,20 +1847,8 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         next_td = shared_tensordict_parent.get("next")
         device = self.device
 
-        if next_td.device != device and device is not None:
-
-            def select_and_clone(name, tensor):
-                if name in self._selected_step_keys:
-                    return tensor.to(device, non_blocking=self.non_blocking)
-
-        else:
-
-            def select_and_clone(name, tensor):
-                if name in self._selected_step_keys:
-                    return tensor.clone()
-
         out = next_td.named_apply(
-            select_and_clone,
+            self.select_and_clone,
             nested_keys=True,
             filter_empty=True,
             device=device,
@@ -2005,20 +1999,10 @@ def tentative_update(val, other):
         selected_output_keys = self._selected_reset_keys_filt
         device = self.device
 
-        if self.shared_tensordict_parent.device != device and device is not None:
-
-            def select_and_clone(name, tensor):
-                if name in selected_output_keys:
-                    return tensor.to(device, non_blocking=self.non_blocking)
-
-        else:
-
-            def select_and_clone(name, tensor):
-                if name in selected_output_keys:
-                    return tensor.clone()
-
         out = self.shared_tensordict_parent.named_apply(
-            select_and_clone,
+            lambda *args: self.select_and_clone(
+                *args, selected_keys=selected_output_keys
+            ),
             nested_keys=True,
             filter_empty=True,
             device=device,