Error related to IsaacSim #343
-
I try to train a custom environment with skrl, but the error "ModuleNotFoundError: No module named 'omni.kit" occurs, what are the reasons, can you give me the advice ? Here is my code import torch
import torch.nn as nn
# import the skrl components to build the RL system
from skrl.agents.torch.ppo import PPO_DEFAULT_CONFIG
from skrl.agents.torch.ppo import PPO_RNN as PPO
from skrl.envs.loaders.torch import load_isaaclab_env
from skrl.envs.wrappers.torch import wrap_env
from skrl.memories.torch import RandomMemory
from skrl.models.torch import DeterministicMixin, CategoricalMixin, Model
from skrl.resources.preprocessors.torch import RunningStandardScaler
from skrl.resources.schedulers.torch import KLAdaptiveRL
from skrl.trainers.torch import SequentialTrainer
from skrl.utils import set_seed
import VAE_navigation.tasks.direct.vae_navigation
import numpy as np
import torch
import torch.nn as nn
import gymnasium as gym
# import the skrl components to build the RL system
from skrl.agents.torch.ppo import PPO_DEFAULT_CONFIG
from skrl.agents.torch.ppo import PPO_RNN as PPO
from skrl.envs.wrappers.torch import wrap_env
from skrl.memories.torch import RandomMemory
from skrl.models.torch import DeterministicMixin, GaussianMixin, Model
from skrl.resources.preprocessors.torch import RunningStandardScaler
from skrl.resources.schedulers.torch import KLAdaptiveRL
from skrl.trainers.torch import SequentialTrainer
from skrl.utils import set_seed
# import isaaclab_tasks # noqa: F401
import argparse
from isaaclab.app import AppLauncher
"""Rest everything follows."""
import gymnasium as gym
import os
import time
import torch
import skrl
from packaging import version
from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
from isaaclab_rl.skrl import SkrlVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
import VAE_navigation.tasks # noqa: F401
# seed for reproducibility
set_seed(42) # e.g. `set_seed(42)` for fixed seed
# define models (stochastic and deterministic models) using mixins
class Policy(GaussianMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False,
clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
Model.__init__(self, observation_space, action_space, device)
GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
nn.ELU(),
nn.Linear(512, 256),
nn.ELU(),
nn.Linear(256, 64),
nn.ELU(),
nn.Linear(64, self.num_actions))
self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
def compute(self, inputs, role):
return torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {}
class Value(DeterministicMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False):
Model.__init__(self, observation_space, action_space, device)
DeterministicMixin.__init__(self, clip_actions)
self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
nn.ELU(),
nn.Linear(512, 256),
nn.ELU(),
nn.Linear(256, 64),
nn.ELU(),
nn.Linear(64, 1))
def compute(self, inputs, role):
return self.net(inputs["states"]), {}
# load and wrap the Isaac Lab environment
env = load_isaaclab_env(task_name="Template-Vae-Navigation-Direct-v0")
env = wrap_env(env)
device = env.device
# instantiate a memory as rollout buffer (any memory can be used for this)
memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device)
# instantiate the agent's models (function approximators).
# PPO requires 2 models, visit its documentation for more details
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html#models
models = {}
models["policy"] = Policy(
observation_space=env.observation_space,
action_space=env.action_space,
device=device,
num_envs=env.num_envs,
)
models["value"] = Value(
observation_space=env.observation_space,
action_space=env.action_space,
device=device,
num_envs=env.num_envs,
)
# configure and instantiate the agent (visit its documentation to see all the options)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html#configuration-and-hyperparameters
cfg = PPO_DEFAULT_CONFIG.copy()
cfg["rollouts"] = 1024 # memory_size
cfg["learning_epochs"] = 10
cfg["mini_batches"] = 4 # 16 * 1024 / 4096
cfg["discount_factor"] = 0.99
cfg["lambda"] = 0.95
cfg["learning_rate"] = 1e-3
cfg["learning_rate_scheduler"] = KLAdaptiveRL
cfg["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
cfg["random_timesteps"] = 0
cfg["learning_starts"] = 0
cfg["grad_norm_clip"] = 0.5
cfg["ratio_clip"] = 0.2
cfg["value_clip"] = 0.2
cfg["clip_predicted_values"] = True
cfg["entropy_loss_scale"] = 0.0
cfg["value_loss_scale"] = 1.0
cfg["kl_threshold"] = 0
cfg["rewards_shaper"] = lambda rewards, *args, **kwargs: rewards * 0.1
cfg["time_limit_bootstrap"] = True
cfg["state_preprocessor"] = RunningStandardScaler
cfg["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
cfg["value_preprocessor"] = RunningStandardScaler
cfg["value_preprocessor_kwargs"] = {"size": 1, "device": device}
# logging to TensorBoard and write checkpoints (in timesteps)
cfg["experiment"]["write_interval"] = 50
cfg["experiment"]["checkpoint_interval"] = 500
cfg["experiment"]["directory"] = "runs/torch/AOD-Float-Discrete-v0-GRU"
agent = PPO(
models=models,
memory=memory,
cfg=cfg,
observation_space=env.observation_space,
action_space=env.action_space,
device=device,
)
# configure and instantiate the RL trainer
cfg_trainer = {"timesteps": 20000, "headless": True}
trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
# start training
trainer.train() |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments
-
Move all IsaacLab-related imports after loading and wrapping the environment: # load and wrap the Isaac Lab environment
env = load_isaaclab_env(task_name="Template-Vae-Navigation-Direct-v0")
env = wrap_env(env)
# ISAACLAB-RELATED IMPORT HERE Related to https://docs.isaacsim.omniverse.nvidia.com/latest/python_scripting/manual_standalone_python.html#simulationapp |
Beta Was this translation helpful? Give feedback.
-
Hi, thank for your suggestion, i did try to import all the isaaclab-related libraries after wrapping the environment, but the error still occurs. Am I doing right or something's still wrong ? # from isaaclab.app import AppLauncher
# launch omniverse app
# app_launcher = AppLauncher()
# simulation_app = app_launcher.app
import torch
import torch.nn as nn
# import the skrl components to build the RL system
from skrl.agents.torch.ppo import PPO_DEFAULT_CONFIG
from skrl.agents.torch.ppo import PPO_RNN as PPO
from skrl.envs.loaders.torch import load_isaaclab_env
from skrl.envs.wrappers.torch import wrap_env
from skrl.memories.torch import RandomMemory
from skrl.models.torch import DeterministicMixin, CategoricalMixin, Model
from skrl.resources.preprocessors.torch import RunningStandardScaler
from skrl.resources.schedulers.torch import KLAdaptiveRL
from skrl.trainers.torch import SequentialTrainer
from skrl.utils import set_seed
import numpy as np
import torch
import torch.nn as nn
import gymnasium as gym
# import the skrl components to build the RL system
from skrl.agents.torch.ppo import PPO_DEFAULT_CONFIG
from skrl.agents.torch.ppo import PPO_RNN as PPO
from skrl.envs.wrappers.torch import wrap_env
from skrl.memories.torch import RandomMemory
from skrl.models.torch import DeterministicMixin, GaussianMixin, Model
from skrl.resources.preprocessors.torch import RunningStandardScaler
from skrl.resources.schedulers.torch import KLAdaptiveRL
from skrl.trainers.torch import SequentialTrainer
from skrl.utils import set_seed
"""Rest everything follows."""
import gymnasium as gym
import os
import time
import torch
import skrl
from packaging import version
# seed for reproducibility
set_seed(42) # e.g. `set_seed(42)` for fixed seed
# define models (stochastic and deterministic models) using mixins
class Policy(GaussianMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False,
clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"):
Model.__init__(self, observation_space, action_space, device)
GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction)
self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
nn.ELU(),
nn.Linear(512, 256),
nn.ELU(),
nn.Linear(256, 64),
nn.ELU(),
nn.Linear(64, self.num_actions))
self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions))
def compute(self, inputs, role):
return torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {}
class Value(DeterministicMixin, Model):
def __init__(self, observation_space, action_space, device, clip_actions=False):
Model.__init__(self, observation_space, action_space, device)
DeterministicMixin.__init__(self, clip_actions)
self.net = nn.Sequential(nn.Linear(self.num_observations, 512),
nn.ELU(),
nn.Linear(512, 256),
nn.ELU(),
nn.Linear(256, 64),
nn.ELU(),
nn.Linear(64, 1))
def compute(self, inputs, role):
return self.net(inputs["states"]), {}
import VAE_navigation.tasks.direct.vae_navigation
# load and wrap the Isaac Lab environment
env = load_isaaclab_env(task_name="Template-Vae-Navigation-Direct-v0")
env = wrap_env(env)
from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
from isaaclab.utils.dict import print_dict
from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
from isaaclab_rl.skrl import SkrlVecEnvWrapper
import isaaclab_tasks # noqa: F401
from isaaclab_tasks.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
import VAE_navigation.tasks # noqa: F401
device = env.device
# instantiate a memory as rollout buffer (any memory can be used for this)
memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device)
# instantiate the agent's models (function approximators).
# PPO requires 2 models, visit its documentation for more details
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html#models
models = {}
models["policy"] = Policy(
observation_space=env.observation_space,
action_space=env.action_space,
device=device,
num_envs=env.num_envs,
)
models["value"] = Value(
observation_space=env.observation_space,
action_space=env.action_space,
device=device,
num_envs=env.num_envs,
)
# configure and instantiate the agent (visit its documentation to see all the options)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html#configuration-and-hyperparameters
cfg = PPO_DEFAULT_CONFIG.copy()
cfg["rollouts"] = 1024 # memory_size
cfg["learning_epochs"] = 10
cfg["mini_batches"] = 4 # 16 * 1024 / 4096
cfg["discount_factor"] = 0.99
cfg["lambda"] = 0.95
cfg["learning_rate"] = 1e-3
cfg["learning_rate_scheduler"] = KLAdaptiveRL
cfg["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008}
cfg["random_timesteps"] = 0
cfg["learning_starts"] = 0
cfg["grad_norm_clip"] = 0.5
cfg["ratio_clip"] = 0.2
cfg["value_clip"] = 0.2
cfg["clip_predicted_values"] = True
cfg["entropy_loss_scale"] = 0.0
cfg["value_loss_scale"] = 1.0
cfg["kl_threshold"] = 0
cfg["rewards_shaper"] = lambda rewards, *args, **kwargs: rewards * 0.1
cfg["time_limit_bootstrap"] = True
cfg["state_preprocessor"] = RunningStandardScaler
cfg["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device}
cfg["value_preprocessor"] = RunningStandardScaler
cfg["value_preprocessor_kwargs"] = {"size": 1, "device": device}
# logging to TensorBoard and write checkpoints (in timesteps)
cfg["experiment"]["write_interval"] = 50
cfg["experiment"]["checkpoint_interval"] = 500
cfg["experiment"]["directory"] = "runs/torch/AOD-Float-Discrete-v0-GRU"
agent = PPO(
models=models,
memory=memory,
cfg=cfg,
observation_space=env.observation_space,
action_space=env.action_space,
device=device,
)
# configure and instantiate the RL trainer
cfg_trainer = {"timesteps": 20000, "headless": True}
trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent)
# start training
trainer.train()
# simulation_app.close()
# # ---------------------------------------------------------
# # comment the code above: `trainer.train()`, and...
# # uncomment the following lines to evaluate a trained agent
# # ---------------------------------------------------------
# from skrl.utils.huggingface import download_model_from_huggingface
# # download the trained agent's checkpoint from Hugging Face Hub and load it
# path = download_model_from_huggingface("skrl/IsaacOrbit-Isaac-Ant-v0-PPO", filename="agent.pt")
# agent.load(path)
# # start evaluation
# trainer.eval() |
Beta Was this translation helpful? Give feedback.
-
In case anyone encounter this error, just load your environment using cfg = VaeNavigationEnvCfg()
env = VaeNavigationEnv(cfg=cfg)
env = wrap_env(env) it works for me. But using env = load_isaaclab_env(task_name="Template-Vae-Navigation-Direct-v0")
env = wrap_env(env) will create the error. |
Beta Was this translation helpful? Give feedback.
Hi @giangdao1402
Move all IsaacLab-related imports after loading and wrapping the environment:
Related to https://docs.isaacsim.omniverse.nvidia.com/latest/python_scripting/manual_standalone_python.html#simulationapp