[Deprecation] Remove AdditiveGaussianWrapper

Vincent Moens · Vincent Moens · commit 6c7f4fbda743 · 2025-02-04T08:34:24.000Z
ghstack-source-id: 78f248e Pull Request resolved: #2748
diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -78,7 +78,6 @@ other cases, the action written in the tensordict is simply the network output.
     :template: rl_template_noinherit.rst
 
     AdditiveGaussianModule
-    AdditiveGaussianWrapper
     ConsistentDropoutModule
     EGreedyModule
     EGreedyWrapper
diff --git a/test/test_exploration.py b/test/test_exploration.py
@@ -35,7 +35,6 @@
 from torchrl.modules.tensordict_module.exploration import (
     _OrnsteinUhlenbeckProcess,
     AdditiveGaussianModule,
-    AdditiveGaussianWrapper,
     EGreedyModule,
     EGreedyWrapper,
     OrnsteinUhlenbeckProcessModule,
@@ -433,7 +432,7 @@ def test_no_spec_error(self, device):
 @pytest.mark.parametrize("device", get_default_devices())
 class TestAdditiveGaussian:
     @pytest.mark.parametrize("spec_origin", ["spec", "policy", None])
-    @pytest.mark.parametrize("interface", ["module", "wrapper"])
+    @pytest.mark.parametrize("interface", ["module"])
     def test_additivegaussian_sd(
         self,
         device,
@@ -475,8 +474,8 @@ def test_additivegaussian_sd(
                 default_interaction_type=InteractionType.RANDOM,
             )
             given_spec = action_spec if spec_origin == "spec" else None
-            exploratory_policy = AdditiveGaussianWrapper(
-                policy, spec=given_spec, device=device
+            exploratory_policy = TensorDictModule(
+                policy, AdditiveGaussianModule(spec=given_spec, device=device)
             )
         if spec_origin is not None:
             sigma_init = (
@@ -524,7 +523,7 @@ def test_additivegaussian_sd(
         assert abs(noisy_action.std() - sigma_end) < 1e-1
 
     @pytest.mark.parametrize("spec_origin", ["spec", "policy", None])
-    @pytest.mark.parametrize("interface", ["module", "wrapper"])
+    @pytest.mark.parametrize("interface", ["module"])
     def test_additivegaussian(
         self,
         device,
@@ -563,9 +562,7 @@ def test_additivegaussian(
                 policy, AdditiveGaussianModule(spec=given_spec).to(device)
             )
         else:
-            exploratory_policy = AdditiveGaussianWrapper(
-                policy, spec=given_spec, safe=False
-            ).to(device)
+            raise NotImplementedError
 
         tensordict = TensorDict(
             batch_size=[batch],
@@ -590,7 +587,7 @@ def test_additivegaussian(
                 assert action_spec.is_in(out.get("action"))
 
     @pytest.mark.parametrize("parallel_spec", [True, False])
-    @pytest.mark.parametrize("interface", ["module", "wrapper"])
+    @pytest.mark.parametrize("interface", ["module"])
     def test_collector(self, device, parallel_spec, interface, seed=0):
         torch.manual_seed(seed)
         env = SerialEnv(
@@ -622,7 +619,7 @@ def test_collector(self, device, parallel_spec, interface, seed=0):
                 policy, AdditiveGaussianModule(spec=action_spec).to(device)
             )
         else:
-            exploratory_policy = AdditiveGaussianWrapper(policy, safe=False)
+            raise NotImplementedError
         exploratory_policy(env.reset())
         collector = SyncDataCollector(
             create_env_fn=env,
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -81,7 +81,7 @@ def cudagraph_mark_step_begin():
 INSTANTIATE_TIMEOUT = 20
 _MIN_TIMEOUT = 1e-3  # should be several orders of magnitude inferior wrt time spent collecting a trajectory
 # MAX_IDLE_COUNT is the maximum number of times a Dataloader worker can timeout with his queue.
-_MAX_IDLE_COUNT = int(os.environ.get("MAX_IDLE_COUNT", float("inf")))
+_MAX_IDLE_COUNT = int(os.environ.get("MAX_IDLE_COUNT", torch.iinfo(torch.int64).max))
 
 DEFAULT_EXPLORATION_TYPE: ExplorationType = ExplorationType.RANDOM
 
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
@@ -27,7 +27,6 @@
     "EGreedyWrapper",
     "EGreedyModule",
     "AdditiveGaussianModule",
-    "AdditiveGaussianWrapper",
     "OrnsteinUhlenbeckProcessModule",
     "OrnsteinUhlenbeckProcessWrapper",
 ]
@@ -220,42 +219,7 @@ def __init__(
 
 
 class AdditiveGaussianWrapper(TensorDictModuleWrapper):
-    """Additive Gaussian PO wrapper.
-
-    Args:
-        policy (TensorDictModule): a policy.
-
-    Keyword Args:
-        sigma_init (scalar, optional): initial epsilon value.
-            default: 1.0
-        sigma_end (scalar, optional): final epsilon value.
-            default: 0.1
-        annealing_num_steps (int, optional): number of steps it will take for
-            sigma to reach the :obj:`sigma_end` value.
-        mean (:obj:`float`, optional): mean of each output element’s normal distribution.
-        std (:obj:`float`, optional): standard deviation of each output element’s normal distribution.
-        action_key (NestedKey, optional): if the policy module has more than one output key,
-            its output spec will be of type Composite. One needs to know where to
-            find the action spec.
-            Default is "action".
-        spec (TensorSpec, optional): if provided, the sampled action will be
-            projected onto the valid action space once explored. If not provided,
-            the exploration wrapper will attempt to recover it from the policy.
-        safe (boolean, optional): if False, the TensorSpec can be None. If it
-            is set to False but the spec is passed, the projection will still
-            happen.
-            Default is True.
-        device (torch.device, optional): the device where the buffers have to be stored.
-
-    .. note::
-        Once an environment has been wrapped in :class:`AdditiveGaussianWrapper`, it is
-        crucial to incorporate a call to :meth:`~.step` in the training loop
-        to update the exploration factor.
-        Since it is not easy to capture this omission no warning or exception
-        will be raised if this is ommitted!
-
-
-    """
+    """[Deprecated] Additive Gaussian PO wrapper."""
 
     def __init__(
         self,
@@ -271,105 +235,9 @@ def __init__(
         safe: Optional[bool] = True,
         device: torch.device | None = None,
     ):
-        warnings.warn(
-            "AdditiveGaussianWrapper is deprecated and will be removed "
-            "in v0.7. Please use torchrl.modules.AdditiveGaussianModule "
-            "instead.",
-            category=DeprecationWarning,
-        )
-        if device is None and hasattr(policy, "parameters"):
-            for p in policy.parameters():
-                device = p.device
-                break
-
-        super().__init__(policy)
-        if sigma_end > sigma_init:
-            raise RuntimeError("sigma should decrease over time or be constant")
-        self.register_buffer("sigma_init", torch.tensor(sigma_init, device=device))
-        self.register_buffer("sigma_end", torch.tensor(sigma_end, device=device))
-        self.annealing_num_steps = annealing_num_steps
-        self.register_buffer("mean", torch.tensor(mean, device=device))
-        self.register_buffer("std", torch.tensor(std, device=device))
-        self.register_buffer(
-            "sigma", torch.tensor(sigma_init, dtype=torch.float32, device=device)
+        raise RuntimeError(
+            "This module has been removed from TorchRL. Please use torchrl.modules.AdditiveGaussianModule instead."
         )
-        self.action_key = action_key
-        self.out_keys = list(self.td_module.out_keys)
-        if action_key not in self.out_keys:
-            raise RuntimeError(
-                f"The action key {action_key} was not found in the td_module out_keys {self.td_module.out_keys}."
-            )
-        if spec is not None:
-            if not isinstance(spec, Composite) and len(self.out_keys) >= 1:
-                spec = Composite({action_key: spec}, shape=spec.shape[:-1])
-            self._spec = spec
-        elif hasattr(self.td_module, "_spec"):
-            self._spec = self.td_module._spec.clone()
-            if action_key not in self._spec.keys(True, True):
-                self._spec[action_key] = None
-        elif hasattr(self.td_module, "spec"):
-            self._spec = self.td_module.spec.clone()
-            if action_key not in self._spec.keys(True, True):
-                self._spec[action_key] = None
-        else:
-            self._spec = Composite({key: None for key in policy.out_keys})
-
-        self.safe = safe
-        if self.safe:
-            self.register_forward_hook(_forward_hook_safe_action)
-
-    @property
-    def spec(self):
-        return self._spec
-
-    def step(self, frames: int = 1) -> None:
-        """A step of sigma decay.
-
-        After self.annealing_num_steps, this function is a no-op.
-
-        Args:
-            frames (int): number of frames since last step.
-
-        """
-        for _ in range(frames):
-            self.sigma.data.copy_(
-                torch.maximum(
-                    self.sigma_end,
-                    self.sigma
-                    - (self.sigma_init - self.sigma_end) / self.annealing_num_steps,
-                ),
-            )
-
-    def _add_noise(self, action: torch.Tensor) -> torch.Tensor:
-        sigma = self.sigma
-        mean = self.mean.expand(action.shape)
-        std = self.std.expand(action.shape)
-        if not mean.dtype.is_floating_point:
-            mean = mean.to(torch.get_default_dtype())
-        if not std.dtype.is_floating_point:
-            std = std.to(torch.get_default_dtype())
-        noise = torch.normal(mean=mean, std=std)
-        if noise.device != action.device:
-            noise = noise.to(action.device)
-        action = action + noise * sigma
-        spec = self.spec
-        spec = spec[self.action_key]
-        if spec is not None:
-            action = spec.project(action)
-        elif self.safe:
-            raise RuntimeError(
-                "the action spec must be provided to AdditiveGaussianWrapper unless "
-                "the `safe` keyword argument is turned off at initialization."
-            )
-        return action
-
-    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        tensordict = self.td_module.forward(tensordict)
-        if exploration_type() is ExplorationType.RANDOM or exploration_type() is None:
-            out = tensordict.get(self.action_key)
-            out = self._add_noise(out)
-            tensordict.set(self.action_key, out)
-        return tensordict
 
 
 class AdditiveGaussianModule(TensorDictModuleBase):