[Feature, BugFix] ObservationNorm keep_dims and RewardSum init (#839)

vmoens · web-flow · commit 2d1723cff538 · 2023-01-17T16:09:06.000Z
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -1114,6 +1114,59 @@ def make_env():
             t_env.transform.loc.device == t_env.observation_spec["observation"].device
         )
 
+    @pytest.mark.parametrize("keys", [["pixels"], ["pixels", "stuff"]])
+    @pytest.mark.parametrize("size", [1, 3])
+    @pytest.mark.parametrize("device", get_available_devices())
+    @pytest.mark.parametrize("standard_normal", [True, False])
+    @pytest.mark.parametrize("parallel", [True, False])
+    def test_observationnorm_init_stats_pixels(
+        self, keys, size, device, standard_normal, parallel
+    ):
+        def make_env():
+            base_env = DiscreteActionConvMockEnvNumpy(
+                seed=0,
+            )
+            base_env.out_key = "pixels"
+            return base_env
+
+        if parallel:
+            base_env = SerialEnv(3, make_env)
+            reduce_dim = (0, 1, 3, 4)
+            keep_dim = (3, 4)
+            cat_dim = 1
+        else:
+            base_env = make_env()
+            reduce_dim = (0, 2, 3)
+            keep_dim = (2, 3)
+            cat_dim = 0
+
+        t_env = TransformedEnv(
+            base_env,
+            transform=ObservationNorm(in_keys=keys, standard_normal=standard_normal),
+        )
+        if len(keys) > 1:
+            t_env.transform.init_stats(
+                num_iter=11,
+                key="pixels",
+                cat_dim=cat_dim,
+                reduce_dim=reduce_dim,
+                keep_dims=keep_dim,
+            )
+        else:
+            t_env.transform.init_stats(
+                num_iter=11,
+                reduce_dim=reduce_dim,
+                cat_dim=cat_dim,
+                keep_dims=keep_dim,
+            )
+
+        assert t_env.transform.loc.shape == torch.Size(
+            [t_env.observation_spec["pixels"].shape[0], 1, 1]
+        )
+        assert t_env.transform.scale.shape == torch.Size(
+            [t_env.observation_spec["pixels"].shape[0], 1, 1]
+        )
+
     def test_observationnorm_stats_already_initialized_error(self):
         transform = ObservationNorm(in_keys="next_observation", loc=0, scale=1)
 
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -1375,6 +1375,7 @@ def init_stats(
         reduce_dim: Union[int, Tuple[int]] = 0,
         cat_dim: Optional[int] = None,
         key: Optional[str] = None,
+        keep_dims: Optional[Tuple[int]] = None,
     ) -> None:
         """Initializes the loc and scale stats of the parent environment.
 
@@ -1394,6 +1395,10 @@ def init_stats(
             key (str, optional): if provided, the summary statistics will be
                 retrieved from that key in the resulting tensordicts.
                 Otherwise, the first key in :obj:`ObservationNorm.in_keys` will be used.
+            keep_dims (tuple of int, optional): the dimensions to keep in the loc and scale.
+                For instance, one may want the location and scale to have shape [C, 1, 1]
+                when normalizing a 3D tensor over the last two dimensions, but not the
+                third. Defaults to None.
 
         """
         if cat_dim is None:
@@ -1440,12 +1445,23 @@ def raise_initialization_exception(module):
             data.append(tensordict.get(key))
 
         data = torch.cat(data, cat_dim)
-        loc = data.mean(reduce_dim)
-        scale = data.std(reduce_dim)
+        if isinstance(reduce_dim, int):
+            reduce_dim = [reduce_dim]
+        if keep_dims is not None:
+            if not all(k in reduce_dim for k in keep_dims):
+                raise ValueError("keep_dim elements must be part of reduce_dim list.")
+        else:
+            keep_dims = []
+        loc = data.mean(reduce_dim, keepdim=True)
+        scale = data.std(reduce_dim, keepdim=True)
+        for r in sorted(reduce_dim, reverse=True):
+            if r not in keep_dims:
+                loc = loc.squeeze(r)
+                scale = scale.squeeze(r)
 
         if not self.standard_normal:
-            loc = loc / scale
-            scale = 1 / scale
+            scale = 1 / scale.clamp_min(self.eps)
+            loc = -loc * scale
 
         if not torch.isfinite(loc).all():
             raise RuntimeError("Non-finite values found in loc")
@@ -2516,9 +2532,22 @@ def reset(self, tensordict: TensorDictBase) -> TensorDictBase:
         """Resets episode rewards."""
         # Non-batched environments
         if len(tensordict.batch_size) < 1 or tensordict.batch_size[0] == 1:
-            for out_key in self.out_keys:
+            for in_key, out_key in zip(self.in_keys, self.out_keys):
                 if out_key in tensordict.keys():
-                    tensordict[out_key] = 0.0
+                    tensordict[out_key] = torch.zeros_like(tensordict[out_key])
+                elif in_key == "reward":
+                    tensordict[out_key] = self.parent.reward_spec.zero()
+                else:
+                    try:
+                        tensordict[out_key] = self.parent.observation_spec[
+                            in_key
+                        ].zero()
+                    except KeyError as err:
+                        raise KeyError(
+                            f"The key {in_key} was not found in the parent "
+                            f"observation_spec with keys "
+                            f"{list(self.parent.observation_spec.keys())}. "
+                        ) from err
 
         # Batched environments
         else:
@@ -2530,9 +2559,27 @@ def reset(self, tensordict: TensorDictBase) -> TensorDictBase:
                     device=tensordict.device,
                 ),
             )
-            for out_key in self.out_keys:
+            for in_key, out_key in zip(self.in_keys, self.out_keys):
                 if out_key in tensordict.keys():
-                    tensordict[out_key][_reset] = 0.0
+                    z = torch.zeros_like(tensordict[out_key])
+                    _reset = _reset.view_as(z)
+                    tensordict[out_key][_reset] = z[_reset]
+                elif in_key == "reward":
+                    # Since the episode reward is not in the tensordict, we need to allocate it
+                    # with zeros entirely (regardless of the _reset mask)
+                    z = self.parent.reward_spec.zero(self.parent.batch_size)
+                    tensordict[out_key] = z
+                else:
+                    try:
+                        tensordict[out_key] = self.parent.observation_spec[in_key].zero(
+                            self.parent.batch_size
+                        )
+                    except KeyError as err:
+                        raise KeyError(
+                            f"The key {in_key} was not found in the parent "
+                            f"observation_spec with keys "
+                            f"{list(self.parent.observation_spec.keys())}. "
+                        ) from err
 
         return tensordict
 
@@ -2554,8 +2601,7 @@ def _call(self, tensordict: TensorDictBase) -> TensorDictBase:
                         *tensordict.shape, 1, dtype=reward.dtype, device=reward.device
                     ),
                 )
-            tensordict[out_key] += reward
-
+            tensordict[out_key] = tensordict[out_key] + reward
         return tensordict
 
     def transform_observation_spec(self, observation_spec: TensorSpec) -> TensorSpec: