[BugFix]: Fix additive noise (#447)

nicolas-dufour · web-flow · commit 8dceee8eee8e · 2022-09-14T19:29:42.000+02:00
diff --git a/test/test_exploration.py b/test/test_exploration.py
@@ -198,20 +198,21 @@ def test_additivegaussian_wrapper(
         )
         out_noexp = []
         out = []
-        for _ in range(n_steps):
-            tensordict_noexp = policy(tensordict.select("observation"))
-            tensordict = exploratory_policy(tensordict)
-            out.append(tensordict.clone())
-            out_noexp.append(tensordict_noexp.clone())
-            tensordict.set_("observation", torch.randn(batch, d_obs, device=device))
-        out = torch.stack(out, 0)
-        out_noexp = torch.stack(out_noexp, 0)
-        assert (out_noexp.get("action") != out.get("action")).all()
-        if spec_origin is not None:
-            assert (out.get("action") <= 1.0).all(), out.get("action").min()
-            assert (out.get("action") >= -1.0).all(), out.get("action").max()
-            if action_spec is not None:
-                assert action_spec.is_in(out.get("action"))
+        if exploratory_policy.spec is not None:
+            for _ in range(n_steps):
+                tensordict_noexp = policy(tensordict.select("observation"))
+                tensordict = exploratory_policy(tensordict)
+                out.append(tensordict.clone())
+                out_noexp.append(tensordict_noexp.clone())
+                tensordict.set_("observation", torch.randn(batch, d_obs, device=device))
+            out = torch.stack(out, 0)
+            out_noexp = torch.stack(out_noexp, 0)
+            assert (out_noexp.get("action") != out.get("action")).all()
+            if spec_origin is not None:
+                assert (out.get("action") <= 1.0).all(), out.get("action").min()
+                assert (out.get("action") >= -1.0).all(), out.get("action").max()
+                if action_spec is not None:
+                    assert action_spec.is_in(out.get("action"))
 
 
 @pytest.mark.parametrize("state_dim", [7])
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
@@ -224,6 +224,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
         tensordict = self.td_module.forward(tensordict)
         if exploration_mode() == "random" or exploration_mode() is None:
             out = tensordict.get(self.action_key)
+            out = self._add_noise(out)
             tensordict.set(self.action_key, out)
         return tensordict