[Feature] Vmas to device (#850)

matteobettini · web-flow · commit 6bebea7c5e85 · 2023-01-24T14:01:50.000Z
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -8,15 +8,14 @@
 import numpy as np
 import pytest
 import torch
-
 from _utils_internal import (
     get_available_devices,
     HALFCHEETAH_VERSIONED,
     PENDULUM_VERSIONED,
     PONG_VERSIONED,
 )
 from packaging import version
-from tensordict.tensordict import assert_allclose_td
+from tensordict.tensordict import assert_allclose_td, TensorDict
 from torchrl._utils import implement_for
 from torchrl.collectors import MultiaSyncDataCollector
 from torchrl.collectors.collectors import RandomPolicy
@@ -685,6 +684,67 @@ def make_vmas():
             [n_workers, list(env.n_agents)[0], list(env.num_envs)[0], n_rollout_samples]
         )
 
+    @pytest.mark.parametrize("num_envs", [1, 10])
+    @pytest.mark.parametrize("n_workers", [1, 3])
+    def test_vmas_reset(
+        self,
+        scenario_name,
+        num_envs,
+        n_workers,
+        n_agents=5,
+        n_rollout_samples=3,
+        max_steps=3,
+    ):
+        def make_vmas():
+            env = VmasEnv(
+                scenario_name=scenario_name,
+                num_envs=num_envs,
+                n_agents=n_agents,
+                max_steps=max_steps,
+            )
+            env.set_seed(0)
+            return env
+
+        env = ParallelEnv(n_workers, make_vmas)
+        tensordict = env.rollout(max_steps=n_rollout_samples)
+
+        assert tensordict["done"].squeeze(-1)[..., -1].all()
+
+        _reset = torch.randint(low=0, high=2, size=env.batch_size, dtype=torch.bool)
+        while not _reset.any():
+            _reset = torch.randint(low=0, high=2, size=env.batch_size, dtype=torch.bool)
+
+        tensordict = env.reset(
+            TensorDict({"_reset": _reset}, batch_size=env.batch_size, device=env.device)
+        )
+        assert tensordict["done"][_reset].all().item() is False
+        # vmas resets all the agent dimension if only one of the agents needs resetting
+        # thus, here we check that where we did not reset any agent, all agents are still done
+        assert tensordict["done"].all(dim=1)[~_reset.any(dim=1)].all().item() is True
+
+    @pytest.mark.skipif(len(get_available_devices()) < 2, reason="not enough devices")
+    @pytest.mark.parametrize("first", [0, 1])
+    def test_to_device(self, scenario_name: str, first: int):
+        devices = get_available_devices()
+
+        def make_vmas():
+            env = VmasEnv(
+                scenario_name=scenario_name,
+                num_envs=7,
+                n_agents=3,
+                seed=0,
+                device=devices[first],
+            )
+            return env
+
+        env = ParallelEnv(3, make_vmas)
+
+        assert env.rollout(max_steps=3).device == devices[first]
+
+        env.to(devices[1 - first])
+
+        assert env.rollout(max_steps=3).device == devices[1 - first]
+
 
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
diff --git a/torchrl/envs/libs/vmas.py b/torchrl/envs/libs/vmas.py
@@ -2,9 +2,8 @@
 
 import torch
 from tensordict.tensordict import TensorDict, TensorDictBase
-
-from torchrl.data import CompositeSpec, UnboundedContinuousTensorSpec
-from torchrl.envs.common import _EnvWrapper
+from torchrl.data import CompositeSpec, DEVICE_TYPING, UnboundedContinuousTensorSpec
+from torchrl.envs.common import _EnvWrapper, EnvBase
 from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform
 from torchrl.envs.utils import _selective_unsqueeze
 
@@ -210,17 +209,23 @@ def _reset(
         self, tensordict: Optional[TensorDictBase] = None, **kwargs
     ) -> TensorDictBase:
         if tensordict is not None and "_reset" in tensordict.keys():
-            envs_to_reset = tensordict.get("_reset").any(dim=0)
+            _reset = tensordict.get("_reset")
+            envs_to_reset = _reset.any(dim=0)
             for env_index, to_reset in enumerate(envs_to_reset):
                 if to_reset:
                     self._env.reset_at(env_index)
+            done = _selective_unsqueeze(self._env.done(), batch_size=(self.num_envs,))
             obs = []
             infos = []
+            dones = []
             for agent in self.agents:
                 obs.append(self.scenario.observation(agent))
                 infos.append(self.scenario.info(agent))
+                dones.append(done.clone())
+
         else:
             obs, infos = self._env.reset(return_info=True)
+            dones = None
 
         agent_tds = []
         for i in range(self.n_agents):
@@ -237,6 +242,8 @@ def _reset(
 
             if infos is not None:
                 agent_td.set("info", agent_info)
+            if dones is not None:
+                agent_td.set("done", dones[i])
             agent_tds.append(agent_td)
 
         tensordict_out = torch.stack(agent_tds, dim=0)
@@ -324,6 +331,10 @@ def __repr__(self) -> str:
             f" batch_size={self.batch_size}, device={self.device})"
         )
 
+    def to(self, device: DEVICE_TYPING) -> EnvBase:
+        self._env.to(device)
+        return super().to(device)
+
 
 class VmasEnv(VmasWrapper):
     """Vmas environment wrapper.