Updates to latest RSL-RL v2.3.0 release (#2154)

Mayankm96 · web-flow · commit 98a8b303b55c · 2025-03-31T14:59:39.000+02:00
# Description This MR introduces multi-GPU training for RSL-RL library. Also adds configuration options for symmetry and RND. Compatible only with RSL-RL v2.3.0 onwards so fixing the version. Fixes #2180 ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
diff --git a/docs/source/_static/refs.bib b/docs/source/_static/refs.bib
@@ -154,3 +154,24 @@ @inproceedings{he2016deep
   pages={770--778},
   year={2016}
 }
+
+@InProceedings{schwarke2023curiosity,
+  title =    {Curiosity-Driven Learning of Joint Locomotion and Manipulation Tasks},
+  author =       {Schwarke, Clemens and Klemm, Victor and Boon, Matthijs van der and Bjelonic, Marko and Hutter, Marco},
+  booktitle =    {Proceedings of The 7th Conference on Robot Learning},
+  pages =    {2594--2610},
+  year =     {2023},
+  volume =   {229},
+  series =   {Proceedings of Machine Learning Research},
+  publisher =    {PMLR},
+  url =      {https://proceedings.mlr.press/v229/schwarke23a.html},
+}
+
+@InProceedings{mittal2024symmetry,
+  author={Mittal, Mayank and Rudin, Nikita and Klemm, Victor and Allshire, Arthur and Hutter, Marco},
+  booktitle={2024 IEEE International Conference on Robotics and Automation (ICRA)},
+  title={Symmetry Considerations for Learning Task Symmetric Robot Policies},
+  year={2024},
+  pages={7433-7439},
+  doi={10.1109/ICRA57147.2024.10611493}
+}
diff --git a/docs/source/features/multi_gpu.rst b/docs/source/features/multi_gpu.rst
@@ -4,7 +4,7 @@ Multi-GPU and Multi-Node Training
 .. currentmodule:: isaaclab
 
 Isaac Lab supports multi-GPU and multi-node reinforcement learning. Currently, this feature is only
-available for RL-Games and skrl libraries workflows. We are working on extending this feature to
+available for RL-Games, RSL-RL and skrl libraries workflows. We are working on extending this feature to
 other workflows.
 
 .. attention::
@@ -57,6 +57,13 @@ To train with multiple GPUs, use the following command, where ``--nproc_per_node
 
             python -m torch.distributed.run --nnodes=1 --nproc_per_node=2 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed
 
+    .. tab-item:: rsl_rl
+        :sync: rsl_rl
+
+        .. code-block:: shell
+
+            python -m torch.distributed.run --nnodes=1 --nproc_per_node=2 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed
+
     .. tab-item:: skrl
         :sync: skrl
 
@@ -95,6 +102,13 @@ For the master node, use the following command, where ``--nproc_per_node`` repre
 
             python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=localhost:5555 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed
 
+    .. tab-item:: rsl_rl
+        :sync: rsl_rl
+
+        .. code-block:: shell
+
+            python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=0 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=localhost:5555 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed
+
     .. tab-item:: skrl
         :sync: skrl
 
@@ -128,6 +142,13 @@ For non-master nodes, use the following command, replacing ``--node_rank`` with
 
             python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=ip_of_master_machine:5555 scripts/reinforcement_learning/rl_games/train.py --task=Isaac-Cartpole-v0 --headless --distributed
 
+    .. tab-item:: rsl_rl
+        :sync: rsl_rl
+
+        .. code-block:: shell
+
+            python -m torch.distributed.run --nproc_per_node=2 --nnodes=2 --node_rank=1 --rdzv_id=123 --rdzv_backend=c10d --rdzv_endpoint=ip_of_master_machine:5555 scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --headless --distributed
+
     .. tab-item:: skrl
         :sync: skrl
 
diff --git a/docs/source/overview/reinforcement-learning/rl_frameworks.rst b/docs/source/overview/reinforcement-learning/rl_frameworks.rst
@@ -27,7 +27,7 @@ Feature Comparison
      - Stable Baselines3
    * - Algorithms Included
      - PPO, SAC, A2C
-     - PPO
+     - PPO, Distillation
      - `Extensive List <https://skrl.readthedocs.io/en/latest/#agents>`__
      - `Extensive List <https://github.com/DLR-RM/stable-baselines3?tab=readme-ov-file#implemented-algorithms>`__
    * - Vectorized Training
@@ -37,7 +37,7 @@ Feature Comparison
      - No
    * - Distributed Training
      - Yes
-     - No
+     - Yes
      - Yes
      - No
    * - ML Frameworks Supported
diff --git a/scripts/benchmarks/benchmark_rsl_rl.py b/scripts/benchmarks/benchmark_rsl_rl.py
@@ -31,6 +31,9 @@
 parser.add_argument("--task", type=str, default=None, help="Name of the task.")
 parser.add_argument("--seed", type=int, default=42, help="Seed used for the environment")
 parser.add_argument("--max_iterations", type=int, default=10, help="RL Policy training iterations.")
+parser.add_argument(
+    "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
+)
 parser.add_argument(
     "--benchmark_backend",
     type=str,
@@ -126,8 +129,27 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     """Train with RSL-RL agent."""
     # parse configuration
     benchmark.set_phase("loading", start_recording_frametime=False, start_recording_runtime=True)
+    # override configurations with non-hydra CLI arguments
     agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    agent_cfg.max_iterations = (
+        args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations
+    )
+
+    # set the environment seed
+    # note: certain randomizations occur in the environment initialization so we set the seed here
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    # multi-gpu training configuration
+    if args_cli.distributed:
+        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
+
+        # set seed to have diversity in different threads
+        seed = agent_cfg.seed + app_launcher.local_rank
+        env_cfg.seed = seed
+        agent_cfg.seed = seed
 
     # specify directory for logging experiments
     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -5,6 +5,21 @@
 
 """Script to play a checkpoint if an RL agent from RSL-RL."""
 
+import platform
+from importlib.metadata import version
+
+if version("rsl-rl-lib") != "2.3.0":
+    if platform.system() == "Windows":
+        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
+    else:
+        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
+    print(
+        f"Please install the correct version of RSL-RL.\nExisting version is: '{version('rsl-rl-lib')}'"
+        " and required version is: '2.3.0'.\nTo install the correct version, run:"
+        f"\n\n\t{' '.join(cmd)}\n"
+    )
+    exit(1)
+
 """Launch Isaac Sim Simulator first."""
 
 import argparse
@@ -120,11 +135,9 @@ def main():
 
     # export policy to onnx/jit
     export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
-    export_policy_as_jit(
-        ppo_runner.alg.actor_critic, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt"
-    )
+    export_policy_as_jit(ppo_runner.alg.policy, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt")
     export_policy_as_onnx(
-        ppo_runner.alg.actor_critic, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
+        ppo_runner.alg.policy, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
     )
 
     dt = env.unwrapped.physics_dt
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -5,6 +5,21 @@
 
 """Script to train RL agent with RSL-RL."""
 
+import platform
+from importlib.metadata import version
+
+if version("rsl-rl-lib") != "2.3.0":
+    if platform.system() == "Windows":
+        cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
+    else:
+        cmd = ["./isaaclab.sh", "-p", "-m", "pip", "install", "rsl-rl-lib==2.3.0"]
+    print(
+        f"Please install the correct version of RSL-RL.\nExisting version is: '{version('rsl-rl-lib')}'"
+        " and required version is: '2.3.0'.\nTo install the correct version, run:"
+        f"\n\n\t{' '.join(cmd)}\n"
+    )
+    exit(1)
+
 """Launch Isaac Sim Simulator first."""
 
 import argparse
@@ -25,6 +40,9 @@
 parser.add_argument("--task", type=str, default=None, help="Name of the task.")
 parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
 parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
+parser.add_argument(
+    "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
+)
 # append RSL-RL cli arguments
 cli_args.add_rsl_rl_args(parser)
 # append AppLauncher cli args
@@ -90,6 +108,16 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     env_cfg.seed = agent_cfg.seed
     env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
 
+    # multi-gpu training configuration
+    if args_cli.distributed:
+        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
+
+        # set seed to have diversity in different threads
+        seed = agent_cfg.seed + app_launcher.local_rank
+        env_cfg.seed = seed
+        agent_cfg.seed = seed
+
     # specify directory for logging experiments
     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
     log_root_path = os.path.abspath(log_root_path)
diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.1.1"
+version = "0.1.2"
 
 # Description
 title = "Isaac Lab RL"
diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst
@@ -1,6 +1,15 @@
 Changelog
 ---------
 
+0.1.2 (2025-03-28)
+~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added symmetry and curiosity-based exploration configurations for RSL-RL wrapper.
+
+
 0.1.1 (2025-03-10)
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py
@@ -17,4 +17,6 @@
 
 from .exporter import export_policy_as_jit, export_policy_as_onnx
 from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+from .rnd_cfg import RslRlRndCfg
+from .symmetry_cfg import RslRlSymmetryCfg
 from .vecenv_wrapper import RslRlVecEnvWrapper
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
@@ -8,6 +8,9 @@
 
 from isaaclab.utils import configclass
 
+from .rnd_cfg import RslRlRndCfg
+from .symmetry_cfg import RslRlSymmetryCfg
+
 
 @configclass
 class RslRlPpoActorCriticCfg:
@@ -19,6 +22,9 @@ class RslRlPpoActorCriticCfg:
     init_noise_std: float = MISSING
     """The initial noise standard deviation for the policy."""
 
+    noise_std_type: Literal["scalar", "log"] = "scalar"
+    """The type of noise standard deviation for the policy. Default is scalar."""
+
     actor_hidden_dims: list[int] = MISSING
     """The hidden dimensions of the actor network."""
 
@@ -72,6 +78,21 @@ class RslRlPpoAlgorithmCfg:
     max_grad_norm: float = MISSING
     """The maximum gradient norm."""
 
+    normalize_advantage_per_mini_batch: bool = False
+    """Whether to normalize the advantage per mini-batch. Default is False.
+
+    If True, the advantage is normalized over the entire collected trajectories.
+    Otherwise, the advantage is normalized over the mini-batches only.
+    """
+
+    symmetry_cfg: RslRlSymmetryCfg | None = None
+    """The symmetry configuration. Default is None, in which case symmetry is not used."""
+
+    rnd_cfg: RslRlRndCfg | None = None
+    """The configuration for the Random Network Distillation (RND) module. Default is None,
+    in which case RND is not used.
+    """
+
 
 @configclass
 class RslRlOnPolicyRunnerCfg:
@@ -99,7 +120,11 @@ class RslRlOnPolicyRunnerCfg:
     """The algorithm configuration."""
 
     clip_actions: float | None = None
-    """The clipping value for actions. If ``None``, then no clipping is done."""
+    """The clipping value for actions. If ``None``, then no clipping is done.
+
+    .. note::
+        This clipping is performed inside the :class:`RslRlVecEnvWrapper` wrapper.
+    """
 
     ##
     # Checkpointing parameters
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rnd_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rnd_cfg.py
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py
diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py