[Feature] add standard_normal for RewardScaling (#682)

adityagandhamal · web-flow · commit 09b28d34de8a · 2022-11-19T06:38:57.000Z
* Add standard_normal

* give attribute access

* Update standard_normal

* Update tests

* Fix tests

* Address in-place scaling of reward

* Improvise tests
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -1289,13 +1289,16 @@ def test_binarized_reward(self, device, batch):
     @pytest.mark.parametrize("loc", [1, 5])
     @pytest.mark.parametrize("keys", [None, ["reward_1"]])
     @pytest.mark.parametrize("device", get_available_devices())
-    def test_reward_scaling(self, batch, scale, loc, keys, device):
+    @pytest.mark.parametrize("standard_normal", [True, False])
+    def test_reward_scaling(self, batch, scale, loc, keys, device, standard_normal):
         torch.manual_seed(0)
         if keys is None:
             keys_total = set([])
         else:
             keys_total = set(keys)
-        reward_scaling = RewardScaling(in_keys=keys, scale=scale, loc=loc)
+        reward_scaling = RewardScaling(
+            in_keys=keys, scale=scale, loc=loc, standard_normal=standard_normal
+        )
         td = TensorDict(
             {
                 **{key: torch.randn(*batch, 1, device=device) for key in keys_total},
@@ -1308,13 +1311,17 @@ def test_reward_scaling(self, batch, scale, loc, keys, device):
         td_copy = td.clone()
         reward_scaling(td)
         for key in keys_total:
-            assert (td.get(key) == td_copy.get(key).mul_(scale).add_(loc)).all()
+            if standard_normal:
+                original_key = td.get(key)
+                scaled_key = (td_copy.get(key) - loc) / scale
+                torch.testing.assert_close(original_key, scaled_key)
+            else:
+                original_key = td.get(key)
+                scaled_key = td_copy.get(key) * scale + loc
+                torch.testing.assert_close(original_key, scaled_key)
         assert (td.get("dont touch") == td_copy.get("dont touch")).all()
-        if len(keys_total) == 0:
-            assert (
-                td.get("reward") == td_copy.get("reward").mul_(scale).add_(loc)
-            ).all()
-        elif len(keys_total) == 1:
+
+        if len(keys_total) == 1:
             reward_spec = UnboundedContinuousTensorSpec(device=device)
             reward_spec = reward_scaling.transform_reward_spec(reward_spec)
             assert reward_spec.shape == torch.Size([1])
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -1500,6 +1500,12 @@ class RewardScaling(Transform):
     Args:
         loc (number or torch.Tensor): location of the affine transform
         scale (number or torch.Tensor): scale of the affine transform
+        standard_normal (bool, optional): if True, the transform will be
+
+            .. math::
+                reward = (reward-loc)/scale
+
+            as it is done for standardization. Default is `False`.
     """
 
     inplace = True
@@ -1509,10 +1515,13 @@ def __init__(
         loc: Union[float, torch.Tensor],
         scale: Union[float, torch.Tensor],
         in_keys: Optional[Sequence[str]] = None,
+        standard_normal: bool = False,
     ):
         if in_keys is None:
             in_keys = ["reward"]
         super().__init__(in_keys=in_keys)
+        self.standard_normal = standard_normal
+
         if not isinstance(loc, torch.Tensor):
             loc = torch.tensor(loc)
         if not isinstance(scale, torch.Tensor):
@@ -1522,8 +1531,16 @@ def __init__(
         self.register_buffer("scale", scale.clamp_min(1e-6))
 
     def _apply_transform(self, reward: torch.Tensor) -> torch.Tensor:
-        reward.mul_(self.scale).add_(self.loc)
-        return reward
+        if self.standard_normal:
+            loc = self.loc
+            scale = self.scale
+            reward = (reward - loc) / scale
+            return reward
+        else:
+            scale = self.scale
+            loc = self.loc
+            reward = reward * scale + loc
+            return reward
 
     def transform_reward_spec(self, reward_spec: TensorSpec) -> TensorSpec:
         if isinstance(reward_spec, UnboundedContinuousTensorSpec):