[Feature] Added batch_lock attribute in EnvBase (#399)

nicolas-dufour · web-flow · commit ef1bf20b4762 · 2022-09-16T13:47:37.000+01:00
diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -126,6 +126,79 @@ def rand_step(self, tensordict: Optional[TensorDictBase] = None) -> TensorDictBa
         return self.step(tensordict)
 
 
+class MockBatchedLockedEnv(EnvBase):
+    """Mocks an env whose batch_size defines the size of the output tensordict"""
+
+    def __init__(self, device, batch_size=None):
+        super(MockBatchedLockedEnv, self).__init__(device=device, batch_size=batch_size)
+        self.action_spec = NdUnboundedContinuousTensorSpec((1,))
+        self.input_spec = CompositeSpec(
+            action=NdUnboundedContinuousTensorSpec((1,)),
+            observation=NdUnboundedContinuousTensorSpec((1,)),
+        )
+        self.observation_spec = CompositeSpec(
+            next_observation=NdUnboundedContinuousTensorSpec((1,))
+        )
+        self.reward_spec = NdUnboundedContinuousTensorSpec((1,))
+        self.counter = 0
+
+    set_seed = MockSerialEnv.set_seed
+    rand_step = MockSerialEnv.rand_step
+
+    def _step(self, tensordict):
+        self.counter += 1
+        # We use tensordict.batch_size instead of self.batch_size since this method will also be used by MockBatchedUnLockedEnv
+        n = (
+            torch.full(tensordict.batch_size, self.counter)
+            .to(self.device)
+            .to(torch.get_default_dtype())
+        )
+        done = self.counter >= self.max_val
+        done = torch.full(
+            tensordict.batch_size, done, dtype=torch.bool, device=self.device
+        )
+
+        return TensorDict(
+            {"reward": n, "done": done, "next_observation": n}, tensordict.batch_size
+        )
+
+    def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+        self.max_val = max(self.counter + 100, self.counter * 2)
+        if tensordict is None:
+            batch_size = self.batch_size
+        else:
+            batch_size = tensordict.batch_size
+
+        n = (
+            torch.full(batch_size, self.counter)
+            .to(self.device)
+            .to(torch.get_default_dtype())
+        )
+        done = self.counter >= self.max_val
+        done = torch.full(batch_size, done, dtype=torch.bool, device=self.device)
+
+        return TensorDict(
+            {"reward": n, "done": done, "next_observation": n}, batch_size
+        )
+
+
+class MockBatchedUnLockedEnv(MockBatchedLockedEnv):
+    """Mocks an env whose batch_size does not define the size of the output tensordict.
+
+    The size of the output tensordict is defined by the input tensordict itself.
+
+    """
+
+    def __init__(self, device, batch_size=None):
+        super(MockBatchedUnLockedEnv, self).__init__(
+            batch_size=batch_size, device=device
+        )
+
+    @classmethod
+    def __new__(cls, *args, **kwargs):
+        return super().__new__(cls, *args, _batch_locked=False, **kwargs)
+
+
 class DiscreteActionVecMockEnv(_MockEnv):
     size = 7
     observation_spec = CompositeSpec(
diff --git a/test/test_env.py b/test/test_env.py
@@ -16,6 +16,8 @@
     DiscreteActionVecMockEnv,
     MockSerialEnv,
     DiscreteActionConvMockEnv,
+    MockBatchedLockedEnv,
+    MockBatchedUnLockedEnv,
 )
 from scipy.stats import chisquare
 from torch import nn
@@ -992,6 +994,58 @@ def test_steptensordict(
         assert out is next_tensordict
 
 
+@pytest.mark.parametrize("device", get_available_devices())
+def test_batch_locked(device):
+    env = MockBatchedLockedEnv(device)
+    assert env.batch_locked
+
+    with pytest.raises(RuntimeError, match="batch_locked is a read-only property"):
+        env.batch_locked = False
+    td = env.reset()
+    td["action"] = env.action_spec.rand(env.batch_size)
+    td_expanded = td.expand(2).clone()
+    td = env.step(td)
+
+    with pytest.raises(
+        RuntimeError, match="Expected a tensordict with shape==env.shape, "
+    ):
+        env.step(td_expanded)
+
+
+@pytest.mark.parametrize("device", get_available_devices())
+def test_batch_unlocked(device):
+    env = MockBatchedUnLockedEnv(device)
+    assert not env.batch_locked
+
+    with pytest.raises(RuntimeError, match="batch_locked is a read-only property"):
+        env.batch_locked = False
+    td = env.reset()
+    td["action"] = env.action_spec.rand(env.batch_size)
+    td_expanded = td.expand(2).clone()
+    td = env.step(td)
+
+    env.step(td_expanded)
+
+
+@pytest.mark.parametrize("device", get_available_devices())
+def test_batch_unlocked_with_batch_size(device):
+    env = MockBatchedUnLockedEnv(device, batch_size=torch.Size([2]))
+    assert not env.batch_locked
+
+    with pytest.raises(RuntimeError, match="batch_locked is a read-only property"):
+        env.batch_locked = False
+
+    td = env.reset()
+    td["action"] = env.action_spec.rand(env.batch_size)
+    td_expanded = td.expand(2, 2).reshape(-1).to_tensordict()
+    td = env.step(td)
+
+    with pytest.raises(
+        RuntimeError, match="Expected a tensordict with shape==env.shape, "
+    ):
+        env.step(td_expanded)
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -8,7 +8,12 @@
 import pytest
 import torch
 from _utils_internal import get_available_devices
-from mocking_classes import ContinuousActionVecMockEnv, DiscreteActionConvMockEnvNumpy
+from mocking_classes import (
+    ContinuousActionVecMockEnv,
+    DiscreteActionConvMockEnvNumpy,
+    MockBatchedLockedEnv,
+    MockBatchedUnLockedEnv,
+)
 from torch import Tensor
 from torch import multiprocessing as mp
 from torchrl import prod
@@ -35,6 +40,7 @@
     RewardScaling,
     BinarizeReward,
     R3MTransform,
+    RewardClipping,
 )
 from torchrl.envs.libs.gym import _has_gym, GymEnv
 from torchrl.envs.transforms import VecNorm, TransformedEnv
@@ -1365,6 +1371,75 @@ def test_r3m_spec_against_real(self, model, tensor_pixels_key, device):
         assert set(expected_keys) == set(transformed_env.rollout(3).keys())
 
 
+@pytest.mark.parametrize("device", get_available_devices())
+def test_batch_locked_transformed(device):
+    env = TransformedEnv(
+        MockBatchedLockedEnv(device),
+        Compose(
+            ObservationNorm(keys_in=["next_observation"], loc=0.5, scale=1.1),
+            RewardClipping(0, 0.1),
+        ),
+    )
+    assert env.batch_locked
+
+    with pytest.raises(RuntimeError, match="batch_locked is a read-only property"):
+        env.batch_locked = False
+    td = env.reset()
+    td["action"] = env.action_spec.rand(env.batch_size)
+    td_expanded = td.expand(2).clone()
+    td = env.step(td)
+
+    with pytest.raises(
+        RuntimeError, match="Expected a tensordict with shape==env.shape, "
+    ):
+        env.step(td_expanded)
+
+
+@pytest.mark.parametrize("device", get_available_devices())
+def test_batch_unlocked_transformed(device):
+    env = TransformedEnv(
+        MockBatchedUnLockedEnv(device),
+        Compose(
+            ObservationNorm(keys_in=["next_observation"], loc=0.5, scale=1.1),
+            RewardClipping(0, 0.1),
+        ),
+    )
+    assert not env.batch_locked
+
+    with pytest.raises(RuntimeError, match="batch_locked is a read-only property"):
+        env.batch_locked = False
+    td = env.reset()
+    td["action"] = env.action_spec.rand(env.batch_size)
+    td_expanded = td.expand(2).clone()
+    td = env.step(td)
+    env.step(td_expanded)
+
+
+@pytest.mark.parametrize("device", get_available_devices())
+def test_batch_unlocked_with_batch_size_transformed(device):
+    env = TransformedEnv(
+        MockBatchedUnLockedEnv(device, batch_size=torch.Size([2])),
+        Compose(
+            ObservationNorm(keys_in=["next_observation"], loc=0.5, scale=1.1),
+            RewardClipping(0, 0.1),
+        ),
+    )
+    assert not env.batch_locked
+
+    with pytest.raises(RuntimeError, match="batch_locked is a read-only property"):
+        env.batch_locked = False
+
+    td = env.reset()
+    td["action"] = env.action_spec.rand(env.batch_size)
+    td_expanded = td.expand(2, 2).reshape(-1).to_tensordict()
+    td = env.step(td)
+
+    with pytest.raises(
+        RuntimeError, match="Expected a tensordict with shape==env.shape, "
+    ):
+        env.step(td_expanded)
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py
@@ -49,12 +49,14 @@ def __init__(
         batch_size: torch.Size,
         env_str: str,
         device: torch.device,
+        batch_locked: bool = True,
     ):
         self.tensordict = tensordict
         self.specs = specs
         self.batch_size = batch_size
         self.env_str = env_str
         self.device = device
+        self.batch_locked = batch_locked
 
     @staticmethod
     def build_metadata_from_env(env) -> EnvMetaData:
@@ -64,19 +66,27 @@ def build_metadata_from_env(env) -> EnvMetaData:
         batch_size = env.batch_size
         env_str = str(env)
         device = env.device
-        return EnvMetaData(tensordict, specs, batch_size, env_str, device)
+        batch_locked = env.batch_locked
+        return EnvMetaData(tensordict, specs, batch_size, env_str, device, batch_locked)
 
     def expand(self, *size: int) -> EnvMetaData:
         tensordict = self.tensordict.expand(*size).to_tensordict()
         batch_size = torch.Size([*size])
         return EnvMetaData(
-            tensordict, self.specs, batch_size, self.env_str, self.device
+            tensordict,
+            self.specs,
+            batch_size,
+            self.env_str,
+            self.device,
+            self.batch_locked,
         )
 
     def to(self, device: DEVICE_TYPING) -> EnvMetaData:
         tensordict = self.tensordict.to(device)
         specs = self.specs.to(device)
-        return EnvMetaData(tensordict, specs, self.batch_size, self.env_str, device)
+        return EnvMetaData(
+            tensordict, specs, self.batch_size, self.env_str, device, self.batch_locked
+        )
 
     def __setstate__(self, state):
         state["tensordict"] = state["tensordict"].to_tensordict().to(state["device"])
@@ -218,10 +228,24 @@ def __init__(
             self.batch_size = torch.Size([])
 
     @classmethod
-    def __new__(cls, *args, **kwargs):
+    def __new__(cls, *args, _batch_locked=True, **kwargs):
         cls._inplace_update = True
+        cls._batch_locked = _batch_locked
         return super().__new__(cls)
 
+    @property
+    def batch_locked(self) -> bool:
+        """
+        Whether the environnement can be used with a batch size different from the one it was initialized with or not.
+        If True, the env needs to be used with a tensordict having the same batch size as the env.
+        batch_locked is an immutable property.
+        """
+        return self._batch_locked
+
+    @batch_locked.setter
+    def batch_locked(self, value: bool) -> None:
+        raise RuntimeError("batch_locked is a read-only property")
+
     @property
     def action_spec(self) -> TensorSpec:
         return self._action_spec
@@ -272,6 +296,8 @@ def step(self, tensordict: TensorDictBase) -> TensorDictBase:
         """
 
         # sanity check
+        self._assert_tensordict_shape(tensordict)
+
         if tensordict.get("action").dtype is not self.action_spec.dtype:
             raise TypeError(
                 f"expected action.dtype to be {self.action_spec.dtype} "
@@ -408,7 +434,9 @@ def set_state(self):
         raise NotImplementedError
 
     def _assert_tensordict_shape(self, tensordict: TensorDictBase) -> None:
-        if tensordict.batch_size != self.batch_size:
+        if tensordict.batch_size != self.batch_size and (
+            self.batch_locked or self.batch_size != torch.Size([])
+        ):
             raise RuntimeError(
                 f"Expected a tensordict with shape==env.shape, "
                 f"got {tensordict.batch_size} and {self.batch_size}"
@@ -531,7 +559,9 @@ def policy(td):
         else:
             raise Exception("reset env before calling rollout!")
 
-        out_td = torch.stack(tensordicts, len(self.batch_size))
+        batch_size = self.batch_size if tensordict is None else tensordict.batch_size
+
+        out_td = torch.stack(tensordicts, len(batch_size))
         if return_contiguous:
             return out_td.contiguous()
         return out_td
diff --git a/torchrl/envs/gym_like.py b/torchrl/envs/gym_like.py
@@ -78,7 +78,7 @@ class GymLikeEnv(_EnvWrapper):
     @classmethod
     def __new__(cls, *args, **kwargs):
         cls._info_dict_reader = None
-        return super().__new__(cls, *args, **kwargs)
+        return super().__new__(cls, *args, _batch_locked=True, **kwargs)
 
     def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         action = tensordict.get("action")
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -312,6 +312,7 @@ def __init__(
         device = kwargs["device"]
         super().__init__(**kwargs)
         self._set_env(env, device)
+        self._inplace_update = env._inplace_update
         if transform is None:
             transform = Compose()
             transform.set_parent(self)
@@ -328,6 +329,11 @@ def __init__(
         self._observation_spec = None
         self.batch_size = self.base_env.batch_size
 
+    def __new__(cls, env, *args, **kwargs):
+        return super().__new__(
+            cls, env, *args, _batch_locked=env.batch_locked, **kwargs
+        )
+
     def _set_env(self, env: EnvBase, device) -> None:
         self.base_env = env.to(device)
         # updates need not be inplace, as transforms may modify values out-place
diff --git a/torchrl/envs/vec_env.py b/torchrl/envs/vec_env.py
@@ -280,6 +280,7 @@ def _set_properties(self):
             self._dummy_env_str = self.meta_data.env_str
             self._device = self.meta_data.device
             self._env_tensordict = self.meta_data.tensordict
+            self._batch_locked = self.meta_data.batch_locked
         else:
             self._batch_size = torch.Size(
                 [self.num_workers, *self.meta_data[0].batch_size]
@@ -300,6 +301,7 @@ def _set_properties(self):
             self._env_tensordict = torch.stack(
                 [meta_data.tensordict for meta_data in self.meta_data], 0
             )
+            self._batch_locked = self.meta_data[0].batch_locked
 
     def state_dict(self) -> OrderedDict:
         raise NotImplementedError