diff --git a/examples/scripts/reinforcement_learning/simple_cart_pole/README.md b/examples/scripts/reinforcement_learning/simple_cart_pole/README.md new file mode 100644 index 0000000000..b26d0a247b --- /dev/null +++ b/examples/scripts/reinforcement_learning/simple_cart_pole/README.md @@ -0,0 +1,55 @@ +# Example for Reinforcement Learning (RL) With Gazebo + +This demo world shows you an example of how you can use SDFormat, Stable Baselines 3 and Gazebo to perform RL with python. +We start with a very simple cart-pole world. This world is defined in our sdf file `cart_pole.sdf`. It is analogous to the cart-pole world in gymnasium. + +## Create a VENV + +First create a virtual environment using python, +``` +python3 -m venv venv +``` +Let's activate it and install stablebaselines3 and pytorch. +``` +. venv/bin/activate +``` + +Lets install our dependencies +``` +pip install stable-baselines3[extra] +``` +For visuallization to work you will also need to: +``` +pip uninstall opencv-python +pip install opencv-python-headless +``` +This is because `opencv-python` brings in Qt5 by default. + +In the same terminal you should add your gazebo python install directory to the `PYTHONPATH`. +If you built gazebo from source in the current working directory this would be: +``` +# cd to the colcon workspace where you built gazebo +cd +export PYTHONPATH=$PYTHONPATH:`pwd`/install/lib/python +. install/setup.bash +``` + +You will also need to set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION to python due to version +mismatches. +``` +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +``` + + +## Exploring the environment + +You can see the environment by using `gz sim cart_pole.sdf`. + +## Perform RL + +To perform RL take a look at `cart_pole_env.py`. We simply subclass `gym.Env` and +create a new gazebo system. Close any instance of gazebo you may be running. +To run the script, in your terminal with the venv sourced run: +``` +python3 cart_pole_env.py +``` diff --git a/examples/scripts/reinforcement_learning/simple_cart_pole/cart_pole.sdf b/examples/scripts/reinforcement_learning/simple_cart_pole/cart_pole.sdf new file mode 100644 index 0000000000..5ab14989a6 --- /dev/null +++ b/examples/scripts/reinforcement_learning/simple_cart_pole/cart_pole.sdf @@ -0,0 +1,356 @@ + + + + + + + 0.001 + 1.0 + + + + + + + + + + true + 0 0 10 0 0 0 + 1 1 1 1 + 0.5 0.5 0.5 1 + + 1000 + 0.9 + 0.01 + 0.001 + + -0.5 0.1 -0.9 + + + + true + + + + + 0 0 1 + 100 100 + + + + + + + 0 0 1 + 100 100 + + + + 0.8 0.8 0.8 1 + 0.8 0.8 0.8 1 + 0.8 0.8 0.8 1 + + + + + + + 0 0 0.325 0 -0 0 + + + -0.151427 -0 1.5 0 -0 0 + + 0.1 + + 0.126164 + 0 + 0 + 0.416519 + 0 + 0.481014 + + + + + + 0.2 0.2 1.5 + + + + 0.5 1.0 0.5 1 + 0.5 1.0 0.5 1 + 0.0 1.0 0.0 1 + + + + + + 0.2 0.2 1.5 + + + + + + + -0.151427 0 2.2 0 -0 0 + + 10.0 + + 1.26164 + 0 + 0 + 4.16519 + 0 + 4.81014 + + + + + + 0.3 0.3 0.3 + + + + 0.5 1.0 0.5 1 + 0.5 1.0 0.5 1 + 0.0 1.0 0.0 1 + + + + + + 0.3 0.3 0.3 + + + + + + + -0.151427 -0 0.175 0 -0 0 + + 1.14395 + + 0.126164 + 0 + 0 + 0.416519 + 0 + 0.481014 + + + + + + 2.01142 1 0.568726 + + + + 0.5 1.0 0.5 1 + 0.5 1.0 0.5 1 + 0.0 1.0 0.0 1 + + + + + + 2.01142 1 0.568726 + + + + + + + 0.554283 0.625029 -0.025 -1.5707 0 0 + + 2 + + 0.145833 + 0 + 0 + 0.145833 + 0 + 0.125 + + + + + + 0.3 + + + + 0.2 0.2 0.2 1 + 0.2 0.2 0.2 1 + 0.2 0.2 0.2 1 + + + + + + 0.3 + + + + + + 1 + 1 + 0.035 + 0 + 0 0 1 + + + 1 + 1 + 0.1 + + + + + + + + 0.554282 -0.625029 -0.025 -1.5707 0 0 + + 2 + + 0.145833 + 0 + 0 + 0.145833 + 0 + 0.125 + + + + + + 0.3 + + + + 0.2 0.2 0.2 1 + 0.2 0.2 0.2 1 + 0.2 0.2 0.2 1 + + + + + + 0.3 + + + + + + 1 + 1 + 0.035 + 0 + 0 0 1 + + + 1 + 1 + 0.1 + + + + + + + + -0.957138 -0 -0.125 0 -0 0 + + 1 + + 0.1 + 0 + 0 + 0.1 + 0 + 0.1 + + + + + + 0.2 + + + + 0.2 0.2 0.2 1 + 0.2 0.2 0.2 1 + 0.2 0.2 0.2 1 + + + + + + 0.2 + + + + + + + chassis + left_wheel + + 0 0 1 + + -1.79769e+308 + 1.79769e+308 + + + + + + chassis + right_wheel + + 0 0 1 + + -1.79769e+308 + 1.79769e+308 + + + + + + chassis + caster + + + + 0 0 -0.75 0 0 0 + chassis + pole + + 0 1 0 + + -1.79769e+308 + 1.79769e+308 + + + + + + pole + pole_mass + + + + + + diff --git a/examples/scripts/reinforcement_learning/simple_cart_pole/cart_pole_env.py b/examples/scripts/reinforcement_learning/simple_cart_pole/cart_pole_env.py new file mode 100644 index 0000000000..4a6fb2bdbb --- /dev/null +++ b/examples/scripts/reinforcement_learning/simple_cart_pole/cart_pole_env.py @@ -0,0 +1,149 @@ + +import os +import gymnasium as gym +import numpy as np + +from gz.common6 import set_verbosity +from gz.sim import TestFixture, World, world_entity, Model, Link, get_install_prefix +from gz.math8 import Vector3d + +from stable_baselines3 import PPO +import time +import subprocess + +file_path = os.path.dirname(os.path.realpath(__file__)) + +def run_gui(): + """ + This function looks for your gz sim installation and looks for + an instance of the gui client + """ + if os.name == 'nt': + base = os.path.join(get_install_prefix(), "libexec", "runGui.exe") + else: + base = os.path.join(get_install_prefix(), "libexec", "runGui") + subprocess.Popen(base) + +class GzRewardScorer: + """ + This Gazebo System is used to introspect and score the world. + """ + def __init__(self): + """ + We initiallize a TestFixture: This is a simple fixture that is used + to load our gazebo world. We also inject the code to be executed + on each run. + """ + self.fixture = TestFixture(os.path.join(file_path, 'cart_pole.sdf')) + self.fixture.on_pre_update(self.on_pre_update) + self.fixture.on_post_update(self.on_post_update) + self.command = None # This variable is used as a bridge between Gymnasium and gazebo + self.fixture.finalize() + self.server = self.fixture.server() + self.terminated = False + + def on_pre_update(self, info, ecm): + """ + on_pre_update is used to command the model vehicle. + """ + world = World(world_entity(ecm)) + self.model = Model(world.model_by_name(ecm, "vehicle_green")) + self.pole_entity = self.model.link_by_name(ecm, "pole") + self.chassis_entity = self.model.link_by_name(ecm, "chassis") + self.pole = Link(self.pole_entity) + self.pole.enable_velocity_checks(ecm) + self.chassis = Link(self.chassis_entity) + self.chassis.enable_velocity_checks(ecm) + if self.command == 1: + self.chassis.add_world_force(ecm, Vector3d(2000, 0, 0)) + elif self.command == 0: + self.chassis.add_world_force(ecm, Vector3d(-2000, 0, 0)) + + def on_post_update(self, info, ecm): + """ + on_post_update is used to read the current state of the world. We write the + state to a local field. + """ + pole_pose = self.pole.world_pose(ecm).rot().euler().y() + if self.pole.world_angular_velocity(ecm) is not None: + pole_angular_vel = self.pole.world_angular_velocity(ecm).y() + else: + pole_angular_vel = 0 + print("Warning failed to get angular velocity") + cart_pose = self.chassis.world_pose(ecm).pos().x() + cart_vel = self.chassis.world_linear_velocity(ecm) + + if cart_vel is not None: + cart_vel = cart_vel.x() + else: + cart_vel = 0 + print("Warning failed to get cart velocity") + # Write the state to the environment + self.state = np.array([cart_pose, cart_vel, pole_pose, pole_angular_vel], dtype=np.float32) + if not self.terminated: + self.terminated = pole_pose > 0.48 or pole_pose < -0.48 or cart_pose > 4.8 or cart_pose < -4.8 + + if self.terminated: + self.reward = 0.0 + else: + self.reward = 1.0 + + def step(self, action, paused=False): + """ + Execute the server. + + There is a bit of nuance in this instance, + our environment has control over every 5 simulation steps. + We block the server till those 5 steps are completed. + """ + self.command = action + self.server.run(True, 5, paused) + obs = self.state + reward = self.reward + return obs, reward, self.terminated, False, {} + + def reset(self): + """ + This function simply resets the server + """ + self.server.reset_all() + self.command = None + self.terminated = False + obs, reward_, term_, tunc_, other_= self.step(None, paused=False) + return obs, {} + + + +class CustomCartPole(gym.Env): + """ + Wrapper around GzRewardScorer that adapts the reward scorer to work with + gymnasium. + """ + def __init__(self, env_config): + self.env = GzRewardScorer() + self.action_space = gym.spaces.Discrete(2) + self.observation_space = gym.spaces.Box( + np.array([-10, float("-inf"), -0.418, -3.4028235e+38]), + np.array([10, float("inf"), 0.418, 3.4028235e+38]), + (4,), np.float32) + + def reset(self, seed=123): + return self.env.reset() + + def step(self, action): + obs, reward, done, truncated, info = self.env.step(action) + return obs, reward, done, truncated, info + +env = CustomCartPole({}) +model = PPO("MlpPolicy", env, verbose=1) +model.learn(total_timesteps=25_000) + +vec_env = model.get_env() +obs = vec_env.reset() + +# Spawn your GUI and see the model you inferred +run_gui() +time.sleep(10) +for i in range(50000): + action, _state = model.predict(obs, deterministic=True) + obs, reward, done, info = vec_env.step(action) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6200b1843a..b47a2d771a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -42,6 +42,7 @@ pybind11_add_module(${BINDINGS_MODULE_NAME} MODULE src/gz/sim/UpdateInfo.cc src/gz/sim/Util.cc src/gz/sim/World.cc + src/gz/sim/InstallationDirectories.cc ) target_link_libraries(${BINDINGS_MODULE_NAME} PRIVATE diff --git a/python/src/gz/sim/InstallationDirectories.cc b/python/src/gz/sim/InstallationDirectories.cc new file mode 100644 index 0000000000..e73b89214d --- /dev/null +++ b/python/src/gz/sim/InstallationDirectories.cc @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2025 Open Source Robotics Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +#include + +#include "InstallationDirectories.hh" + +namespace gz +{ +namespace sim +{ +namespace python +{ +void defineSimInstallationDirectories(pybind11::module &_module) +{ + + _module.def("get_install_prefix", [](){ + return getInstallPrefix(); + }, + "getInstallPrefix return the install prefix of the library") + .def("get_gui_config_path", [](){ + return getGUIConfigPath(); + }, + "getGUIConfigPath return the GUI config path") + .def("get_system_config_path", [](){ + return getSystemConfigPath(); + }, + "getSystemConfigPath return the system config path") + .def("get_server_config_path", [](){ + return getServerConfigPath(); + }, + "getServerConfigPath return the server config path") + .def("get_plugin_install_dir", [](){ + return getPluginInstallDir(); + }, + "getPluginInstallDir return the plugin install dir") + .def("get_gui_plugin_install_dir", [](){ + return getGUIPluginInstallDir(); + }, + "getGUIPluginInstallDir return the GUI plugin install dir") + .def("get_world_install_dir", [](){ + return getWorldInstallDir(); + }, + "getWorldInstallDir return the world install dir") + .def("get_media_install_dir", [](){ + return getMediaInstallDir(); + }, + "getMediaInstallDir return the media install dir"); +} +} // namespace python +} // namespace sim +} // namespace gz diff --git a/python/src/gz/sim/InstallationDirectories.hh b/python/src/gz/sim/InstallationDirectories.hh new file mode 100644 index 0000000000..621f898636 --- /dev/null +++ b/python/src/gz/sim/InstallationDirectories.hh @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2025 Open Source Robotics Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef GZ_SIM_PYTHON__INSTALLATION_DIRECTORIES_HH_ +#define GZ_SIM_PYTHON__INSTALLATION_DIRECTORIES_HH_ + +#include + +namespace gz +{ +namespace sim +{ +namespace python +{ +/// Define a pybind11 wrapper for a gz::sim::Server +/** + * \param[in] module a pybind11 module to add the definition to + */ +void +defineSimInstallationDirectories(pybind11::module &_module); +} // namespace python +} // namespace sim +} // namespace gz + +#endif // GZ_SIM_PYTHON__INSTALLATION_DIRECTORIES_HH_ diff --git a/python/src/gz/sim/_gz_sim_pybind11.cc b/python/src/gz/sim/_gz_sim_pybind11.cc index acf9373dc5..fea576a161 100644 --- a/python/src/gz/sim/_gz_sim_pybind11.cc +++ b/python/src/gz/sim/_gz_sim_pybind11.cc @@ -21,6 +21,7 @@ #include "Actor.hh" #include "EntityComponentManager.hh" #include "EventManager.hh" +#include "InstallationDirectories.hh" #include "Joint.hh" #include "Light.hh" #include "Link.hh" @@ -39,6 +40,7 @@ PYBIND11_MODULE(BINDINGS_MODULE_NAME, m) { gz::sim::python::defineSimActor(m); gz::sim::python::defineSimEntityComponentManager(m); gz::sim::python::defineSimEventManager(m); + gz::sim::python::defineSimInstallationDirectories(m); gz::sim::python::defineSimJoint(m); gz::sim::python::defineSimLight(m); gz::sim::python::defineSimLink(m); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 54345d0b5d..b802c0d880 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -164,6 +164,11 @@ if (MSVC) set_source_files_properties(${sources} ${gtest_sources} COMPILE_FLAGS "/wd4251 /wd4146") endif() +# Executable target that runs the GUI without ruby for debugging purposes. +add_executable(runGui cmd/runGui_main.cc) +target_link_libraries(runGui PRIVATE ${gz_lib_target} gz-gui gz) +install(TARGETS runGui DESTINATION ${CMAKE_INSTALL_PREFIX}/libexec) + # Create the library target gz_create_core_library(SOURCES ${sources} CXX_STANDARD 17) gz_add_get_install_prefix_impl(GET_INSTALL_PREFIX_FUNCTION gz::sim::getInstallPrefix