[Deprecation] Softly change default behavior of auto_unwrap

Vincent Moens · Vincent Moens · commit 2046bc536c0c · 2025-02-20T21:24:36.000Z
ghstack-source-id: c28c11e Pull Request resolved: #2793
diff --git a/docs/source/reference/utils.rst b/docs/source/reference/utils.rst
@@ -1,4 +1,4 @@
-.. currentmodule:: torchrl._utils
+.. currentmodule:: torchrl
 
 torchrl._utils package
 ====================
@@ -11,3 +11,5 @@ Set of utility methods that are used internally by the library.
     :template: rl_template.rst
 
     implement_for
+    set_auto_unwrap_transformed_env
+    auto_unwrap_transformed_env
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -33,7 +33,7 @@
 from tensordict.nn import TensorDictSequential
 from tensordict.utils import _unravel_key_to_tuple, assert_allclose_td
 from torch import multiprocessing as mp, nn, Tensor
-from torchrl._utils import _replace_last, prod
+from torchrl._utils import _replace_last, prod, set_auto_unwrap_transformed_env
 
 from torchrl.collectors import MultiSyncDataCollector
 from torchrl.data import (
@@ -9846,6 +9846,40 @@ def test_added_transforms_are_in_eval_mode():
 
 
 class TestTransformedEnv:
+    @pytest.mark.filterwarnings("error")
+    def test_nested_transformed_env(self):
+        base_env = ContinuousActionVecMockEnv()
+        t1 = RewardScaling(0, 1)
+        t2 = RewardScaling(0, 2)
+
+        def test_unwrap():
+            env = TransformedEnv(TransformedEnv(base_env, t1), t2)
+            assert env.base_env is base_env
+            assert isinstance(env.transform, Compose)
+            children = list(env.transform.transforms.children())
+            assert len(children) == 2
+            assert children[0].scale == 1
+            assert children[1].scale == 2
+
+        def test_wrap(auto_unwrap=None):
+            env = TransformedEnv(
+                TransformedEnv(base_env, t1), t2, auto_unwrap=auto_unwrap
+            )
+            assert env.base_env is not base_env
+            assert isinstance(env.base_env.transform, RewardScaling)
+            assert isinstance(env.transform, RewardScaling)
+
+        with pytest.warns(FutureWarning):
+            test_unwrap()
+
+        test_wrap(False)
+
+        with set_auto_unwrap_transformed_env(True):
+            test_unwrap()
+
+        with set_auto_unwrap_transformed_env(False):
+            test_wrap()
+
     def test_attr_error(self):
         class BuggyTransform(Transform):
             def transform_observation_spec(
@@ -9936,20 +9970,6 @@ def test_allow_done_after_reset(self):
         assert not t1._allow_done_after_reset
 
 
-def test_nested_transformed_env():
-    base_env = ContinuousActionVecMockEnv()
-    t1 = RewardScaling(0, 1)
-    t2 = RewardScaling(0, 2)
-    env = TransformedEnv(TransformedEnv(base_env, t1), t2)
-
-    assert env.base_env is base_env
-    assert isinstance(env.transform, Compose)
-    children = list(env.transform.transforms.children())
-    assert len(children) == 2
-    assert children[0].scale == 1
-    assert children[1].scale == 2
-
-
 def test_transform_parent():
     base_env = ContinuousActionVecMockEnv()
     t1 = RewardScaling(0, 1)
diff --git a/torchrl/__init__.py b/torchrl/__init__.py
@@ -52,7 +52,13 @@
 import torchrl.modules
 import torchrl.objectives
 import torchrl.trainers
-from torchrl._utils import compile_with_warmup, timeit
+from torchrl._utils import (
+    auto_unwrap_transformed_env,
+    compile_with_warmup,
+    implement_for,
+    set_auto_unwrap_transformed_env,
+    timeit,
+)
 
 # Filter warnings in subprocesses: True by default given the multiple optional
 # deps of the library. This can be turned on via `torchrl.filter_warnings_subprocess = False`.
diff --git a/torchrl/_utils.py b/torchrl/_utils.py
@@ -984,3 +984,86 @@ def count_and_compile(*model_args, **model_kwargs):
             return compiled_model(*model_args, **model_kwargs)
 
         return count_and_compile
+
+
+# auto unwrap control
+_DEFAULT_AUTO_UNWRAP = True
+_AUTO_UNWRAP = os.environ.get("AUTO_UNWRAP_TRANSFORMED_ENV")
+
+
+class set_auto_unwrap_transformed_env(_DecoratorContextManager):
+    """A context manager or decorator to control whether TransformedEnv should automatically unwrap nested TransformedEnv instances.
+
+    Args:
+        mode (bool): Whether to automatically unwrap nested :class:`~torchrl.envs.TransformedEnv`
+            instances. If ``False``, :class:`~torchrl.envs.TransformedEnv` will not unwrap nested instances.
+            Defaults to ``True``.
+
+    .. note:: Until v0.9, this will raise a warning if :class:`~torchrl.envs.TransformedEnv` are nested
+        and the value is not set explicitly (`auto_unwrap=True` default behavior).
+        You can set the value of :func:`~torchrl.envs.auto_unwrap_transformed_env`
+        through:
+
+        - The ``AUTO_UNWRAP_TRANSFORMED_ENV`` environment variable;
+        - By setting ``torchrl.set_auto_unwrap_transformed_env(val: bool).set()`` at the
+          beginning of your script;
+        - By using ``torchrl.set_auto_unwrap_transformed_env(val: bool)`` as a context
+          manager or a decorator.
+
+    .. seealso:: :class:`~torchrl.envs.TransformedEnv`
+
+    Examples:
+        >>> with set_auto_unwrap_transformed_env(False):
+        ...     env = TransformedEnv(TransformedEnv(env))
+        ...     assert not isinstance(env.base_env, TransformedEnv)
+        >>> @set_auto_unwrap_transformed_env(False)
+        ... def my_function():
+        ...     env = TransformedEnv(TransformedEnv(env))
+        ...     assert not isinstance(env.base_env, TransformedEnv)
+        ...     return env
+
+    """
+
+    def __init__(self, mode: bool) -> None:
+        super().__init__()
+        self.mode = mode
+
+    def clone(self) -> set_auto_unwrap_transformed_env:
+        # override this method if your children class takes __init__ parameters
+        return type(self)(self.mode)
+
+    def __enter__(self) -> None:
+        self.set()
+
+    def set(self) -> None:
+        global _AUTO_UNWRAP
+        self._old_mode = _AUTO_UNWRAP
+        _AUTO_UNWRAP = bool(self.mode)
+        # we do this such that sub-processes see the same lazy op than the main one
+        os.environ["AUTO_UNWRAP_TRANSFORMED_ENV"] = str(_AUTO_UNWRAP)
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        global _AUTO_UNWRAP
+        _AUTO_UNWRAP = self._old_mode
+        os.environ["AUTO_UNWRAP_TRANSFORMED_ENV"] = str(_AUTO_UNWRAP)
+
+
+def auto_unwrap_transformed_env(allow_none=False):
+    """Get the current setting for automatically unwrapping TransformedEnv instances.
+
+    Args:
+        allow_none (bool, optional): If True, returns ``None`` if no setting has been
+            specified. Otherwise, returns the default setting. Defaults to ``False``.
+
+    seealso: :func:`~torchrl.set_auto_unwrap_transformed_env`
+
+    Returns:
+        bool or None: The current setting for automatically unwrapping TransformedEnv
+            instances.
+    """
+    global _AUTO_UNWRAP
+    if _AUTO_UNWRAP is None and allow_none:
+        return None
+    elif _AUTO_UNWRAP is None:
+        return _DEFAULT_AUTO_UNWRAP
+    return strtobool(_AUTO_UNWRAP) if isinstance(_AUTO_UNWRAP, str) else _AUTO_UNWRAP
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -61,6 +61,7 @@
     _ends_with,
     _make_ordinal_device,
     _replace_last,
+    auto_unwrap_transformed_env,
     logger as torchrl_logger,
 )
 
@@ -705,7 +706,11 @@ class TransformedEnv(EnvBase, metaclass=_TEnvPostInit):
     Keyword Args:
         auto_unwrap (bool, optional): if ``True``, wrapping a transformed env in  transformed env
             unwraps the transforms of the inner TransformedEnv in the outer one (the new instance).
-            Defaults to ``True``
+            Defaults to ``True``.
+
+            .. note:: This behavior will switch to ``False`` in v0.9.
+
+            .. seealso:: :class:`~torchrl.set_auto_unwrap_transformed_env`
 
     Examples:
         >>> env = GymEnv("Pendulum-v0")
@@ -724,7 +729,7 @@ def __init__(
         transform: Optional[Transform] = None,
         cache_specs: bool = True,
         *,
-        auto_unwrap: bool = True,
+        auto_unwrap: bool | None = None,
         **kwargs,
     ):
         self._transform = None
@@ -737,7 +742,24 @@ def __init__(
 
         # Type matching must be exact here, because subtyping could introduce differences in behavior that must
         # be contained within the subclass.
-        if type(env) is TransformedEnv and type(self) is TransformedEnv and auto_unwrap:
+        if type(env) is TransformedEnv and type(self) is TransformedEnv:
+            if auto_unwrap is None:
+                auto_unwrap = auto_unwrap_transformed_env(allow_none=True)
+                if auto_unwrap is None:
+                    warnings.warn(
+                        "The default behavior of TransformedEnv will change in version 0.9. "
+                        "Nested TransformedEnvs will no longer be automatically unwrapped by default. "
+                        "To prepare for this change, use set_auto_unwrap_transformed_env(val: bool) "
+                        "as a decorator or context manager, or set the environment variable "
+                        "AUTO_UNWRAP_TRANSFORMED_ENV to 'False'.",
+                        FutureWarning,
+                        stacklevel=2,
+                    )
+                    auto_unwrap = True
+        else:
+            auto_unwrap = False
+
+        if auto_unwrap:
             self._set_env(env.base_env, device)
             if type(transform) is not Compose:
                 # we don't use isinstance as some transforms may be subclassed from
@@ -768,6 +790,7 @@ def __init__(
             self._set_env(env, device)
             if transform is None:
                 transform = Compose()
+
         self.transform = transform
 
         self._last_obs = None