[Deprecation] Remove OrnsteinUhlenbeckProcessWrapper

Vincent Moens · Vincent Moens · commit 0111a87957bd · 2025-02-04T08:34:24.000Z
ghstack-source-id: 401fdfa Pull Request resolved: #2749
diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -80,9 +80,7 @@ other cases, the action written in the tensordict is simply the network output.
     AdditiveGaussianModule
     ConsistentDropoutModule
     EGreedyModule
-    EGreedyWrapper
     OrnsteinUhlenbeckProcessModule
-    OrnsteinUhlenbeckProcessWrapper
 
 Probabilistic actors
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/test/test_exploration.py b/test/test_exploration.py
@@ -38,7 +38,6 @@
     EGreedyModule,
     EGreedyWrapper,
     OrnsteinUhlenbeckProcessModule,
-    OrnsteinUhlenbeckProcessWrapper,
 )
 
 if os.getenv("PYTORCH_TEST_FBCODE"):
@@ -235,7 +234,7 @@ def test_ou_process(self, device, seed=0):
         assert pval_acc > 0.05
         assert pval_reg < 0.1
 
-    @pytest.mark.parametrize("interface", ["module", "wrapper"])
+    @pytest.mark.parametrize("interface", ["module"])
     def test_ou(
         self, device, interface, d_obs=4, d_act=6, batch=32, n_steps=100, seed=0
     ):
@@ -257,8 +256,7 @@ def test_ou(
             ou = OrnsteinUhlenbeckProcessModule(spec=action_spec, device=device)
             exploratory_policy = TensorDictSequential(policy, ou)
         else:
-            exploratory_policy = OrnsteinUhlenbeckProcessWrapper(policy, device=device)
-            ou = exploratory_policy
+            raise NotImplementedError
 
         tensordict = TensorDict(
             batch_size=[batch],
@@ -299,7 +297,7 @@ def test_ou(
 
     @pytest.mark.parametrize("parallel_spec", [True, False])
     @pytest.mark.parametrize("probabilistic", [True, False])
-    @pytest.mark.parametrize("interface", ["module", "wrapper"])
+    @pytest.mark.parametrize("interface", ["module"])
     def test_collector(self, device, parallel_spec, probabilistic, interface, seed=0):
         torch.manual_seed(seed)
         env = SerialEnv(
@@ -340,7 +338,7 @@ def test_collector(self, device, parallel_spec, probabilistic, interface, seed=0
                 policy, OrnsteinUhlenbeckProcessModule(spec=action_spec, device=device)
             )
         else:
-            exploratory_policy = OrnsteinUhlenbeckProcessWrapper(policy, device=device)
+            raise NotImplementedError
         exploratory_policy(env.reset())
         collector = SyncDataCollector(
             create_env_fn=env,
@@ -357,7 +355,7 @@ def test_collector(self, device, parallel_spec, probabilistic, interface, seed=0
     @pytest.mark.parametrize("nested_obs_action", [True, False])
     @pytest.mark.parametrize("nested_done", [True, False])
     @pytest.mark.parametrize("is_init_key", ["some"])
-    @pytest.mark.parametrize("interface", ["module", "wrapper"])
+    @pytest.mark.parametrize("interface", ["module"])
     def test_nested(
         self,
         device,
@@ -401,12 +399,7 @@ def test_nested(
                 ).to(device),
             )
         else:
-            exploratory_policy = OrnsteinUhlenbeckProcessWrapper(
-                policy,
-                spec=action_spec,
-                action_key=env.action_key,
-                is_init_key=is_init_key,
-            )
+            raise NotImplementedError
         collector = SyncDataCollector(
             create_env_fn=env,
             policy=exploratory_policy,
diff --git a/torchrl/modules/tensordict_module/exploration.py b/torchrl/modules/tensordict_module/exploration.py
@@ -370,94 +370,7 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
 
 
 class OrnsteinUhlenbeckProcessWrapper(TensorDictModuleWrapper):
-    r"""Ornstein-Uhlenbeck exploration policy wrapper.
-
-    Presented in "CONTINUOUS CONTROL WITH DEEP REINFORCEMENT LEARNING", https://arxiv.org/pdf/1509.02971.pdf.
-
-    The OU exploration is to be used with continuous control policies and introduces a auto-correlated exploration
-    noise. This enables a sort of 'structured' exploration.
-
-    Noise equation:
-
-    .. math::
-        noise_t = noise_{t-1} + \theta * (mu - noise_{t-1}) * dt + \sigma_t * \sqrt{dt} * W
-
-    Sigma equation:
-
-    .. math::
-        \sigma_t = max(\sigma^{min, (-(\sigma_{t-1} - \sigma^{min}) / (n^{\text{steps annealing}}) * n^{\text{steps}} + \sigma))
-
-    To keep track of the steps and noise from sample to sample, an :obj:`"ou_prev_noise{id}"` and :obj:`"ou_steps{id}"` keys
-    will be written in the input/output tensordict. It is expected that the tensordict will be zeroed at reset,
-    indicating that a new trajectory is being collected. If not, and is the same tensordict is used for consecutive
-    trajectories, the step count will keep on increasing across rollouts. Note that the collector classes take care of
-    zeroing the tensordict at reset time.
-
-    .. note::
-        Once an environment has been wrapped in :class:`OrnsteinUhlenbeckProcessWrapper`, it is
-        crucial to incorporate a call to :meth:`~.step` in the training loop
-        to update the exploration factor.
-        Since it is not easy to capture this omission no warning or exception
-        will be raised if this is ommitted!
-
-    Args:
-        policy (TensorDictModule): a policy
-
-    Keyword Args:
-        eps_init (scalar): initial epsilon value, determining the amount of noise to be added.
-            default: 1.0
-        eps_end (scalar): final epsilon value, determining the amount of noise to be added.
-            default: 0.1
-        annealing_num_steps (int): number of steps it will take for epsilon to reach the eps_end value.
-            default: 1000
-        theta (scalar): theta factor in the noise equation
-            default: 0.15
-        mu (scalar): OU average (mu in the noise equation).
-            default: 0.0
-        sigma (scalar): sigma value in the sigma equation.
-            default: 0.2
-        dt (scalar): dt in the noise equation.
-            default: 0.01
-        x0 (Tensor, ndarray, optional): initial value of the process.
-            default: 0.0
-        sigma_min (number, optional): sigma_min in the sigma equation.
-            default: None
-        n_steps_annealing (int): number of steps for the sigma annealing.
-            default: 1000
-        action_key (NestedKey, optional): key of the action to be modified.
-            default: "action"
-        is_init_key (NestedKey, optional): key where to find the is_init flag used to reset the noise steps.
-            default: "is_init"
-        spec (TensorSpec, optional): if provided, the sampled action will be
-            projected onto the valid action space once explored. If not provided,
-            the exploration wrapper will attempt to recover it from the policy.
-        safe (bool): if ``True``, actions that are out of bounds given the action specs will be projected in the space
-            given the :obj:`TensorSpec.project` heuristic.
-            default: True
-        device (torch.device, optional): the device where the buffers have to be stored.
-
-    Examples:
-        >>> import torch
-        >>> from tensordict import TensorDict
-        >>> from torchrl.data import Bounded
-        >>> from torchrl.modules import OrnsteinUhlenbeckProcessWrapper, Actor
-        >>> torch.manual_seed(0)
-        >>> spec = Bounded(-1, 1, torch.Size([4]))
-        >>> module = torch.nn.Linear(4, 4, bias=False)
-        >>> policy = Actor(module=module, spec=spec)
-        >>> explorative_policy = OrnsteinUhlenbeckProcessWrapper(policy)
-        >>> td = TensorDict({"observation": torch.zeros(10, 4)}, batch_size=[10])
-        >>> print(explorative_policy(td))
-        TensorDict(
-            fields={
-                _ou_prev_noise: Tensor(torch.Size([10, 4]), dtype=torch.float32),
-                _ou_steps: Tensor(torch.Size([10, 1]), dtype=torch.int64),
-                action: Tensor(torch.Size([10, 4]), dtype=torch.float32),
-                observation: Tensor(torch.Size([10, 4]), dtype=torch.float32)},
-            batch_size=torch.Size([10]),
-            device=None,
-            is_shared=False)
-    """
+    """[Deprecated] Ornstein-Uhlenbeck exploration policy wrapper."""
 
     def __init__(
         self,
@@ -480,119 +393,9 @@ def __init__(
         key: Optional[NestedKey] = None,
         device: torch.device | None = None,
     ):
-        warnings.warn(
-            "OrnsteinUhlenbeckProcessWrapper is deprecated and will be removed "
-            "in v0.7. Please use torchrl.modules.OrnsteinUhlenbeckProcessModule "
-            "instead.",
-            category=DeprecationWarning,
-        )
-        if device is None and hasattr(policy, "parameters"):
-            for p in policy.parameters():
-                device = p.device
-                break
-        if key is not None:
-            action_key = key
-            warnings.warn(
-                f"the 'key' keyword argument of {type(self)} has been renamed 'action_key'. The 'key' entry will be deprecated soon."
-            )
-        super().__init__(policy)
-        self.ou = _OrnsteinUhlenbeckProcess(
-            theta=theta,
-            mu=mu,
-            sigma=sigma,
-            dt=dt,
-            x0=x0,
-            sigma_min=sigma_min,
-            n_steps_annealing=n_steps_annealing,
-            key=action_key,
-            device=device,
-        )
-        self.register_buffer("eps_init", torch.tensor(eps_init, device=device))
-        self.register_buffer("eps_end", torch.tensor(eps_end, device=device))
-        if self.eps_end > self.eps_init:
-            raise ValueError(
-                "eps should decrease over time or be constant, "
-                f"got eps_init={eps_init} and eps_end={eps_end}"
-            )
-        self.annealing_num_steps = annealing_num_steps
-        self.register_buffer(
-            "eps", torch.tensor(eps_init, dtype=torch.float32, device=device)
+        raise RuntimeError(
+            "OrnsteinUhlenbeckProcessWrapper has been removed. Please use torchrl.modules.OrnsteinUhlenbeckProcessModule instead."
         )
-        self.out_keys = list(self.td_module.out_keys) + self.ou.out_keys
-        self.is_init_key = is_init_key
-        noise_key = self.ou.noise_key
-        steps_key = self.ou.steps_key
-
-        if spec is not None:
-            if not isinstance(spec, Composite) and len(self.out_keys) >= 1:
-                spec = Composite({action_key: spec}, shape=spec.shape[:-1])
-            self._spec = spec
-        elif hasattr(self.td_module, "_spec"):
-            self._spec = self.td_module._spec.clone()
-            if action_key not in self._spec.keys(True, True):
-                self._spec[action_key] = None
-        elif hasattr(self.td_module, "spec"):
-            self._spec = self.td_module.spec.clone()
-            if action_key not in self._spec.keys(True, True):
-                self._spec[action_key] = None
-        else:
-            self._spec = Composite({key: None for key in policy.out_keys})
-        ou_specs = {
-            noise_key: None,
-            steps_key: None,
-        }
-        self._spec.update(ou_specs)
-        if len(set(self.out_keys)) != len(self.out_keys):
-            raise RuntimeError(f"Got multiple identical output keys: {self.out_keys}")
-        self.safe = safe
-        if self.safe:
-            self.register_forward_hook(_forward_hook_safe_action)
-
-    @property
-    def spec(self):
-        return self._spec
-
-    def step(self, frames: int = 1) -> None:
-        """Updates the eps noise factor.
-
-        Args:
-            frames (int): number of frames of the current batch (corresponding to the number of updates to be made).
-
-        """
-        for _ in range(frames):
-            if self.annealing_num_steps > 0:
-                self.eps.data.copy_(
-                    torch.maximum(
-                        self.eps_end,
-                        (
-                            self.eps
-                            - (self.eps_init - self.eps_end) / self.annealing_num_steps
-                        ),
-                    )
-                )
-            else:
-                raise ValueError(
-                    f"{self.__class__.__name__}.step() called when "
-                    f"self.annealing_num_steps={self.annealing_num_steps}. Expected a strictly positive "
-                    f"number of frames."
-                )
-
-    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
-        tensordict = super().forward(tensordict)
-        if exploration_type() == ExplorationType.RANDOM or exploration_type() is None:
-            is_init = tensordict.get(self.is_init_key, None)
-            if is_init is None:
-                warnings.warn(
-                    f"The tensordict passed to {self.__class__.__name__} appears to be "
-                    f"missing the '{self.is_init_key}' entry. This entry is used to "
-                    f"reset the noise at the beginning of a trajectory, without it "
-                    f"the behavior of this exploration method is undefined. "
-                    f"This is allowed for BC compatibility purposes but it will be deprecated soon! "
-                    f"To create a '{self.is_init_key}' entry, simply append an torchrl.envs.InitTracker "
-                    f"transform to your environment with `env = TransformedEnv(env, InitTracker())`."
-                )
-            tensordict = self.ou.add_sample(tensordict, self.eps, is_init=is_init)
-        return tensordict
 
 
 class OrnsteinUhlenbeckProcessModule(TensorDictModuleBase):