pytorch
diff --git a/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 2 additions & 2 deletions b/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/unittest/linux_distributed/scripts/setup_env.sh
Lines changed: 2 additions & 26 deletions b/‎.github/unittest/linux_distributed/scripts/setup_env.sh
Lines changed: 2 additions & 26 deletions
diff --git a/‎.github/unittest/linux_libs/scripts_gym/batch_scripts.sh
Lines changed: 3 additions & 3 deletions b/‎.github/unittest/linux_libs/scripts_gym/batch_scripts.sh
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/unittest/linux_libs/scripts_llm/run_test.sh
Lines changed: 6 additions & 3 deletions b/‎.github/unittest/linux_libs/scripts_llm/run_test.sh
Lines changed: 6 additions & 3 deletions
diff --git a/‎.github/unittest/linux_sota/scripts/run_all.sh
Lines changed: 1 addition & 1 deletion b/‎.github/unittest/linux_sota/scripts/run_all.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎sota-check/README.md
Lines changed: 1 addition & 1 deletion b/‎sota-check/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/_utils_internal.py
Lines changed: 1 addition & 1 deletion b/‎test/_utils_internal.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_env.py
Lines changed: 3 additions & 1 deletion b/‎test/test_env.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎test/test_libs.py
Lines changed: 69 additions & 34 deletions b/‎test/test_libs.py
Lines changed: 69 additions & 34 deletions
diff --git a/‎torchrl/_utils.py
Lines changed: 10 additions & 3 deletions b/‎torchrl/_utils.py
Lines changed: 10 additions & 3 deletions
@@ -97,9 +97,9 @@ echo "installing gymnasium"
 if [[ "$PYTHON_VERSION" == "3.12" ]]; then
   pip3 install ale-py
   pip3 install sympy
-  pip3 install "gymnasium[accept-rom-license,mujoco]>=1.1" mo-gymnasium[mujoco]
+  pip3 install "gymnasium[mujoco]>=1.1" mo-gymnasium[mujoco]
 else
-  pip3 install "gymnasium[atari,accept-rom-license,mujoco]>=1.1" mo-gymnasium[mujoco]
+  pip3 install "gymnasium[atari,mujoco]>=1.1" mo-gymnasium[mujoco]
 fi
 pip3 install "mujoco" -U
 
 
@@ -96,32 +96,8 @@ if [[ $OSTYPE != 'darwin'* ]]; then
   # install ale-py: manylinux names are broken for CentOS so we need to manually download and
   # rename them
   PY_VERSION=$(python --version)
-  echo "installing ale-py for ${PY_PY_VERSION}"
-  if [[ $PY_VERSION == *"3.7"* ]]; then
-    wget https://files.pythonhosted.org/packages/ab/fd/6615982d9460df7f476cad265af1378057eee9daaa8e0026de4cedbaffbd/ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.8"* ]]; then
-    wget https://files.pythonhosted.org/packages/0f/8a/feed20571a697588bc4bfef05d6a487429c84f31406a52f8af295a0346a2/ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.9"* ]]; then
-    wget https://files.pythonhosted.org/packages/a0/98/4316c1cedd9934f9a91b6e27a9be126043b4445594b40cfa391c8de2e5e8/ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.10"* ]]; then
-    wget https://files.pythonhosted.org/packages/60/1b/3adde7f44f79fcc50d0a00a0643255e48024c4c3977359747d149dc43500/ale_py-0.8.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-    mv ale_py-0.8.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  elif [[ $PY_VERSION == *"3.11"* ]]; then
-    wget https://files.pythonhosted.org/packages/60/1b/3adde7f44f79fcc50d0a00a0643255e48024c4c3977359747d149dc43500/ale_py-0.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-    mv ale_py-0.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    pip install ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-    rm ale_py-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-  fi
   echo "installing gymnasium"
-  pip install "gymnasium[atari,accept-rom-license]>=1.1"
+  pip install "gymnasium[atari]>=1.1"
 else
-  pip install "gymnasium[atari,accept-rom-license]>=1.1"
+  pip install "gymnasium[atari]>=1.1"
 fi
@@ -126,7 +126,7 @@ do
   conda activate ./cloned_env
 
   echo "Testing gym version: ${GYM_VERSION}"
-  pip3 install 'gymnasium[atari,accept-rom-license,ale-py]'==$GYM_VERSION
+  pip3 install 'gymnasium[atari,ale-py]'==$GYM_VERSION
 
   $DIR/run_test.sh
 
@@ -140,7 +140,7 @@ conda deactivate
 conda create --prefix ./cloned_env --clone ./env -y
 conda activate ./cloned_env
 
-pip3 install 'gymnasium[accept-rom-license,ale-py,atari]>=1.1.0' mo-gymnasium gymnasium-robotics -U
+pip3 install 'gymnasium[ale-py,atari]>=1.1.0' mo-gymnasium gymnasium-robotics -U
 
 $DIR/run_test.sh
 
@@ -155,7 +155,7 @@ conda deactivate
 conda create --prefix ./cloned_env --clone ./env -y
 conda activate ./cloned_env
 
-pip3 install 'gymnasium[accept-rom-license,ale-py,atari]>=1.1.0' mo-gymnasium gymnasium-robotics -U
+pip3 install 'gymnasium[ale-py,atari]>=1.1.0' mo-gymnasium gymnasium-robotics -U
 
 $DIR/run_test.sh
 
 
@@ -10,6 +10,9 @@ ln -s /usr/bin/swig3.0 /usr/bin/swig
 
 export PYTORCH_TEST_WITH_SLOW='1'
 export LAZY_LEGACY_OP=False
+
+# to solve RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
+export VLLM_WORKER_MULTIPROC_METHOD=spawn
 python -m torch.utils.collect_env
 # Avoid error: "fatal: unsafe repository"
 git config --global --add safe.directory '*'
@@ -22,11 +25,11 @@ conda deactivate && conda activate ./env
 
 python -c "import transformers, datasets"
 
-python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_rlhf.py --instafail -v --durations 200 --capture no --error-for-skips
+pytest test/test_rlhf.py --instafail -v --durations 200 --capture no --error-for-skips
 
-python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_actors.py -k llm --instafail -v --durations 200 --capture no --error-for-skips --runslow
+pytest test/test_actors.py test/test_collector.py -k llm --instafail -v --durations 200 --capture no --error-for-skips --runslow
 
-python .github/unittest/helpers/coverage_run_parallel.py examples/rlhf/train_rlhf.py \
+pytest examples/rlhf/train_rlhf.py \
   sys.device=cuda:0 sys.ref_device=cuda:0 \
   model.name_or_path=gpt2 train.max_epochs=2 \
   data.batch_size=2 train.ppo.ppo_batch_size=2 \
 
@@ -112,7 +112,7 @@ python -c """import gym;import d4rl"""
 
 # install ale-py: manylinux names are broken for CentOS so we need to manually download and
 # rename them
-pip install "gymnasium[atari,accept-rom-license]>=1.1.0"
+pip install "gymnasium[atari]>=1.1.0"
 
 # ============================================================================================ #
 # ================================ PyTorch & TorchRL ========================================= #
 
@@ -26,7 +26,7 @@ export MUJOCO_GL=egl
 conda create -n rl-sota-bench python=3.10 -y 
 conda install anaconda::libglu -y
 pip3 install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
-pip3 install "gymnasium[accept-rom-license,atari,mujoco]" vmas tqdm wandb pygame "moviepy<2.0.0" imageio submitit hydra-core transformers
+pip3 install "gymnasium[atari,mujoco]" vmas tqdm wandb pygame "moviepy<2.0.0" imageio submitit hydra-core transformers
 
 cd /path/to/tensordict
 python setup.py develop
 
@@ -156,7 +156,7 @@ def _set_gym_environments():  # noqa: F811
     global _CARTPOLE_VERSIONED, _HALFCHEETAH_VERSIONED, _PENDULUM_VERSIONED, _PONG_VERSIONED, _BREAKOUT_VERSIONED
 
     _CARTPOLE_VERSIONED = "CartPole-v1"
-    _HALFCHEETAH_VERSIONED = "HalfCheetah-v4"
+    _HALFCHEETAH_VERSIONED = "HalfCheetah-v5"
     _PENDULUM_VERSIONED = "Pendulum-v1"
     _PONG_VERSIONED = "ALE/Pong-v5"
     _BREAKOUT_VERSIONED = "ALE/Breakout-v5"
 
@@ -56,6 +56,7 @@
     ParallelEnv,
     PendulumEnv,
     SerialEnv,
+    set_gym_backend,
     TicTacToeEnv,
 )
 from torchrl.envs.batched_envs import _stackable
@@ -2511,6 +2512,7 @@ def test_info_dict_reader(self, device, seed=0):
             import gymnasium as gym
         except ModuleNotFoundError:
             import gym
+        set_gym_backend(gym).set()
 
         env = GymWrapper(gym.make(HALFCHEETAH_VERSIONED()), device=device)
         env.set_info_dict_reader(
@@ -2542,7 +2544,7 @@ def test_info_dict_reader(self, device, seed=0):
             ),
             [Unbounded((), dtype=torch.float64)],
         ):
-            env2 = GymWrapper(gym.make("HalfCheetah-v4"))
+            env2 = GymWrapper(gym.make("HalfCheetah-v5"))
             env2.set_info_dict_reader(
                 default_info_dict_reader(["x_position"], spec=spec)
             )
 
@@ -190,6 +190,7 @@ def maybe_init_minigrid():
         minigrid.register_minigrid_envs()
 
 
+@implement_for("gym")
 def get_gym_pixel_wrapper():
     try:
         # works whenever gym_version > version.parse("0.19")
@@ -203,6 +204,29 @@ def get_gym_pixel_wrapper():
     return PixelObservationWrapper
 
 
+@implement_for("gymnasium", None, "1.1.0")
+def get_gym_pixel_wrapper():  # noqa: F811
+    try:
+        # works whenever gym_version > version.parse("0.19")
+        PixelObservationWrapper = gym_backend(
+            "wrappers.pixel_observation"
+        ).PixelObservationWrapper
+    except Exception:
+        from torchrl.envs.libs.utils import (
+            GymPixelObservationWrapper as PixelObservationWrapper,
+        )
+    return PixelObservationWrapper
+
+
+@implement_for("gymnasium", "1.1.0")
+def get_gym_pixel_wrapper():  # noqa: F811
+    # works whenever gym_version > version.parse("0.19")
+    PixelObservationWrapper = lambda *args, pixels_only=False, **kwargs: gym_backend(
+        "wrappers"
+    ).AddRenderObservation(*args, render_only=pixels_only, **kwargs)
+    return PixelObservationWrapper
+
+
 if _has_gym:
     try:
         from gymnasium import __version__ as gym_version
@@ -1030,7 +1054,12 @@ def test_one_hot_and_categorical(self):  # noqa: F811
     )
     @pytest.mark.flaky(reruns=5, reruns_delay=1)
     def test_vecenvs_wrapper(self, envname):
-        self._test_vecenvs_wrapper(envname)
+        import gymnasium
+
+        with set_gym_backend("gymnasium"):
+            self._test_vecenvs_wrapper(
+                envname, kwargs={"reset_mode": gymnasium.vector.AutoresetMode.SAME_STEP}
+            )
 
     @implement_for("gymnasium", None, "1.0.0")
     @pytest.mark.parametrize(
@@ -1040,22 +1069,25 @@ def test_vecenvs_wrapper(self, envname):
     )
     @pytest.mark.flaky(reruns=5, reruns_delay=1)
     def test_vecenvs_wrapper(self, envname):  # noqa
-        self._test_vecenvs_wrapper(envname)
+        with set_gym_backend("gymnasium"):
+            self._test_vecenvs_wrapper(envname)
 
-    def _test_vecenvs_wrapper(self, envname):
+    def _test_vecenvs_wrapper(self, envname, kwargs=None):
         import gymnasium
 
+        if kwargs is None:
+            kwargs = {}
         # we can't use parametrize with implement_for
         env = GymWrapper(
             gymnasium.vector.SyncVectorEnv(
-                2 * [lambda envname=envname: gymnasium.make(envname)]
+                2 * [lambda envname=envname: gymnasium.make(envname)], **kwargs
             )
         )
         assert env.batch_size == torch.Size([2])
         check_env_specs(env)
         env = GymWrapper(
             gymnasium.vector.AsyncVectorEnv(
-                2 * [lambda envname=envname: gymnasium.make(envname)]
+                2 * [lambda envname=envname: gymnasium.make(envname)], **kwargs
             )
         )
         assert env.batch_size == torch.Size([2])
@@ -1113,25 +1145,26 @@ def _test_vecenvs_env(self, envname):
     )
     @pytest.mark.flaky(reruns=5, reruns_delay=1)
     def test_vecenvs_wrapper(self, envname):  # noqa: F811
-        gym = gym_backend()
-        # we can't use parametrize with implement_for
-        for envname in ["CartPole-v1", "HalfCheetah-v4"]:
-            env = GymWrapper(
-                gym.vector.SyncVectorEnv(
-                    2 * [lambda envname=envname: gym.make(envname)]
+        with set_gym_backend("gym"):
+            gym = gym_backend()
+            # we can't use parametrize with implement_for
+            for envname in ["CartPole-v1", "HalfCheetah-v4"]:
+                env = GymWrapper(
+                    gym.vector.SyncVectorEnv(
+                        2 * [lambda envname=envname: gym.make(envname)]
+                    )
                 )
-            )
-            assert env.batch_size == torch.Size([2])
-            check_env_specs(env)
-            env = GymWrapper(
-                gym.vector.AsyncVectorEnv(
-                    2 * [lambda envname=envname: gym.make(envname)]
+                assert env.batch_size == torch.Size([2])
+                check_env_specs(env)
+                env = GymWrapper(
+                    gym.vector.AsyncVectorEnv(
+                        2 * [lambda envname=envname: gym.make(envname)]
+                    )
                 )
-            )
-            assert env.batch_size == torch.Size([2])
-            check_env_specs(env)
-            env.close()
-            del env
+                assert env.batch_size == torch.Size([2])
+                check_env_specs(env)
+                env.close()
+                del env
 
     @implement_for("gym", "0.18")
     @pytest.mark.parametrize(
@@ -1150,17 +1183,17 @@ def test_vecenvs_env(self, envname):  # noqa: F811
                 env = GymEnv(envname, num_envs=2, from_pixels=False)
                 env.set_seed(0)
                 assert env.get_library_name(env._env) == "gym"
-            # rollouts can be executed without decorator
-            check_env_specs(env)
-            rollout = env.rollout(100, break_when_any_done=False)
-            for obs_key in env.observation_spec.keys(True, True):
-                rollout_consistency_assertion(
-                    rollout,
-                    done_key="done",
-                    observation_key=obs_key,
-                    done_strict="CartPole" in envname,
-                )
-            env.close()
+                # rollouts can be executed without decorator
+                check_env_specs(env)
+                rollout = env.rollout(100, break_when_any_done=False)
+                for obs_key in env.observation_spec.keys(True, True):
+                    rollout_consistency_assertion(
+                        rollout,
+                        done_key="done",
+                        observation_key=obs_key,
+                        done_strict="CartPole" in envname,
+                    )
+                env.close()
             del env
             if envname != "CartPole-v1":
                 with set_gym_backend("gym"):
@@ -1469,7 +1502,7 @@ def reset(
                     {},
                 )
 
-        yield CountingEnvRandomReset
+        return CountingEnvRandomReset
 
     @implement_for("gym")
     def test_gymnasium_autoreset(self, venv):
@@ -1484,6 +1517,8 @@ def test_gymnasium_autoreset(self, venv):  # noqa
     def test_gymnasium_autoreset(self, venv):  # noqa
         import gymnasium as gym
 
+        set_gym_backend("gymnasium").set()
+
         counting_env = self.counting_env()
         if venv == "sync":
             venv = gym.vector.SyncVectorEnv
 
@@ -423,10 +423,17 @@ def module_set(self):
         else:
             # class not yet defined
             return
+        try:
+            delattr(cls, self.fn.__name__)
+        except AttributeError:
+            pass
+
+        name = self.fn.__name__
         if self.class_method:
-            setattr(cls, self.fn.__name__, classmethod(self.fn))
+            fn = classmethod(self.fn)
         else:
-            setattr(cls, self.fn.__name__, self.fn)
+            fn = self.fn
+        setattr(cls, name, fn)
 
     @classmethod
     def import_module(cls, module_name: Callable | str) -> str:
@@ -543,7 +550,7 @@ def __repr__(self):
         return (
             f"{self.__class__.__name__}("
             f"module_name={self.module_name}({self.from_version, self.to_version}), "
-            f"fn_name={self.fn.__name__}, cls={self._get_cls(self.fn)}, is_set={self.do_set})"
+            f"fn_name={self.fn.__name__}, cls={self._get_cls(self.fn)})"
         )