Unverified Commit e9862d42 authored by Toni-SM's avatar Toni-SM Committed by GitHub

Integration of the skrl RL library (#6)

* Add environment wrapper for the skrl RL library
* Add training workflow for the skrl RL library
* Add training configuration .yaml files for the skrl RL library
* Add evaluation workflow for the skrl RL library
* Add skrl to RL extra dependencies
* Increase MINOR version and update CHANGELOG
* Add skrl LICENSE to docs dependencies
parent 576b1ea4
......@@ -109,6 +109,7 @@ autodoc_mock_imports = [
"omni.isaac.kit",
"omni.isaac.cloner",
"gym",
"skrl",
"stable_baselines3",
"rsl_rl",
"rl_games",
......
MIT License
Copyright (c) 2021 Toni-SM
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
......@@ -21,6 +21,7 @@ environment instance to the wrapper constructor. For instance, to wrap an enviro
.. _RL-Games: https://github.com/Denys88/rl_games
.. _RSL-RL: https://github.com/leggedrobotics/rsl_rl
.. _skrl: https://github.com/Toni-SM/skrl
.. _Stable-Baselines3: https://github.com/DLR-RM/stable-baselines3
......@@ -38,6 +39,13 @@ RSL-RL Wrapper
:members:
:show-inheritance:
SKRL Wrapper
------------
.. automodule:: omni.isaac.orbit_envs.utils.wrappers.skrl
:members:
:show-inheritance:
Stable-Baselines3 Wrapper
-------------------------
......
......@@ -164,6 +164,18 @@ from the environments into the respective libraries function argument and return
# run script for playing with 32 environments
./orbit.sh -p source/standalone/workflows/sb3/play.py --task Isaac-Cartpole-v0 --num_envs 32 --checkpoint /PATH/TO/model.zip
- Training an agent with
`SKRL <https://skrl.readthedocs.io>`__ on ``Isaac-Reach-Franka-v0``:
.. code:: bash
# install python module (for skrl)
./orbit.sh -p -m pip install -e 'source/extensions/omni.isaac.orbit_envs[skrl]'
# run script for training
./orbit.sh -p source/standalone/workflows/skrl/train.py --task Isaac-Reach-Franka-v0 --headless
# run script for playing with 32 environments
./orbit.sh -p source/standalone/workflows/skrl/play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --checkpoint /PATH/TO/model.pt
- Training an agent with
`RL-Games <https://github.com/Denys88/rl_games>`__ on ``Isaac-Ant-v0``:
......@@ -188,7 +200,6 @@ from the environments into the respective libraries function argument and return
# run script for playing with 32 environments
./orbit.sh -p source/standalone/workflows/rsl_rl/play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --checkpoint /PATH/TO/model.pth
All the scripts above log the training progress to `Tensorboard`_ in the ``logs`` directory in the root of
the repository. The logs directory follows the pattern ``logs/<library>/<task>/<date-time>``, where ``<library>``
is the name of the learning framework, ``<task>`` is the task name, and ``<date-time>`` is the timestamp at
......
......@@ -8,7 +8,7 @@ in a vectorized fashion, they can only be used with frameworks that support vect
Many common frameworks come with their own desired definitions of a vectorized environment and require the returned data
to follow their supported data types and data structures. For example, ``stable-baselines3`` uses ``numpy`` arrays, while
``rsl-rl`` or ``rl-games`` use ``torch.Tensor``. We provide wrappers for these different frameworks, which can be found
``rsl-rl``, ``rl-games``, or ``skrl`` use ``torch.Tensor``. We provide wrappers for these different frameworks, which can be found
in the ``omni.isaac.orbit_envs.utils.wrappers`` module.
......
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.1.1"
version = "0.2.0"
# Description
title = "ORBIT Environments"
......
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: True
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 1.0
kl_threshold: 0
rewards_shaper_scale: 0.01
# logging and checkpoint
experiment:
directory: "ant"
experiment_name: ""
write_interval: 40
checkpoint_interval: 400
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 8000
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [128, 128, 128]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [128, 128, 128]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 24
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 1.e-3
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 1.0
kl_threshold: 0
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "anymal"
experiment_name: ""
write_interval: 60
checkpoint_interval: 600
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 12000
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: True
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [32, 32]
hidden_activation: ["elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [32, 32]
hidden_activation: ["elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 1
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "cartpole"
experiment_name: ""
write_interval: 16
checkpoint_interval: 80
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 1600
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: True
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [400, 200, 100]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [400, 200, 100]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 32
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 1.0
kl_threshold: 0
rewards_shaper_scale: 0.01
# logging and checkpoint
experiment:
directory: "humanoid"
experiment_name: ""
write_interval: 80
checkpoint_interval: 800
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 16000
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: True
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
rewards_shaper_scale: 0.01
# logging and checkpoint
experiment:
directory: "lift"
experiment_name: ""
write_interval: 120
checkpoint_interval: 1200
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 24000
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "reach"
experiment_name: ""
write_interval: 40
checkpoint_interval: 400
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 16000
Changelog
---------
0.2.0 (2023-01-25)
~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Environment wrapper for the skrl RL library
* Training/evaluation configuration files for the skrl RL library
0.1.2 (2023-01-19)
~~~~~~~~~~~~~~~~~~
......
......@@ -359,7 +359,7 @@ class IsaacEnv(gym.Env):
This handling must be taken care of by the wrapper around the :class:`IsaacEnv` instance.
Note:
For included frameworks (RSL-RL, RL-Games), the observations must have the key "policy". In case,
For included frameworks (RSL-RL, RL-Games, skrl), the observations must have the key "policy". In case,
the key "critic" is also present, then the critic observations are taken from the "critic" group.
Otherwise, they are the same as the "policy" group.
......
......@@ -10,5 +10,6 @@ Currently the following are supported:
* Stable-Baselines3: https://github.com/DLR-RM/stable-baselines3
* RL-Games: https://github.com/Denys88/rl_games
* RSL-RL: https://github.com/leggedrobotics/rsl_rl
* skrl: https://github.com/Toni-SM/skrl
"""
"""Wrapper to configure an :class:`IsaacEnv` instance to skrl environment
The following example shows how to wrap an environment for skrl:
.. code-block:: python
from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlVecEnvWrapper
env = SkrlVecEnvWrapper(env)
Or, equivalently, by directly calling the skrl library API as follows:
.. code-block:: python
from skrl.envs.torch.wrappers import wrap_env
env = wrap_env(env, wrapper="isaac-orbit")
"""
import torch
from typing import List, Optional, Union
import tqdm
# skrl
from skrl.agents.torch import Agent
from skrl.envs.torch.wrappers import Wrapper, wrap_env
from skrl.trainers.torch import Trainer
from omni.isaac.orbit_envs.isaac_env import IsaacEnv
__all__ = ["SkrlVecEnvWrapper"]
"""
Vectorized environment wrapper.
"""
def SkrlVecEnvWrapper(env: IsaacEnv):
"""Wraps around IsaacSim environment for skrl.
This function wraps around the IsaacSim environment. Since the :class:`IsaacEnv` environment
wrapping functionality is defined within the skrl library itself, this implementation
is maintained for compatibility with the structure of the extension that contains it.
Internally it calls the :func:`wrap_env` from the skrl library API.
Reference:
https://skrl.readthedocs.io/en/latest/modules/skrl.envs.wrapping.html
"""
# check that input is valid
if not isinstance(env.unwrapped, IsaacEnv):
raise ValueError(f"The environment must be inherited from IsaacEnv. Environment type: {type(env)}")
# wrap and return the environment
return wrap_env(env, wrapper="isaac-orbit")
class SkrlLogTrainer(Trainer):
def __init__(
self,
env: Wrapper,
agents: Union[Agent, List[Agent]],
agents_scope: Optional[List[int]] = None,
cfg: Optional[dict] = None,
) -> None:
"""Customized trainer for tracking episode information
Reference:
https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.base_class.html
"""
default_cfg = {"timesteps": 1000, "disable_progressbar": False}
default_cfg.update(cfg if cfg is not None else {})
super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=default_cfg)
def train(self):
"""Train the agent"""
# init agent
self.agents.init(trainer_cfg=self.cfg)
self.agents.set_running_mode("train")
# reset env
states, infos = self.env.reset()
# training loop
for timestep in tqdm.tqdm(range(self.timesteps), disable=self.disable_progressbar):
# pre-interaction
self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps)
# compute actions
with torch.no_grad():
actions = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)[0]
# step the environments
next_states, rewards, terminated, truncated, infos = self.env.step(actions)
# record the environments' transitions
with torch.no_grad():
self.agents.record_transition(
states=states,
actions=actions,
rewards=rewards,
next_states=next_states,
terminated=terminated,
truncated=truncated,
infos=infos,
timestep=timestep,
timesteps=self.timesteps,
)
# log custom environment data
if "episode" in infos:
for k, v in infos["episode"].items():
if isinstance(v, torch.Tensor) and v.numel() == 1:
self.agents.track_data(f"Info / {k}", v.item())
# post-interaction
self.agents.post_interaction(timestep=timestep, timesteps=self.timesteps)
# update states
states.copy_(next_states)
......@@ -32,6 +32,7 @@ INSTALL_REQUIRES = [
# Extra dependencies for RL agents
EXTRAS_REQUIRE = {
"sb3": ["stable-baselines3>=1.5.0", "tensorboard"],
"skrl": ["skrl>=0.10.0"],
"rl_games": ["rl-games==1.5.2"],
"rsl_rl": ["rsl_rl@git+https://github.com/leggedrobotics/rsl_rl.git"],
"robomimic": ["robomimic@git+https://github.com/ARISE-Initiative/robomimic.git"],
......
import os
import yaml
from skrl.resources.preprocessors.torch import RunningStandardScaler # noqa: F401
from skrl.resources.schedulers.torch import KLAdaptiveRL # noqa: F401
from skrl.utils.model_instantiators import Shape # noqa: F401
from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
__all__ = ["SKRL_PPO_CONFIG_FILE", "parse_skrl_cfg"]
SKRL_PPO_CONFIG_FILE = {
# classic
"Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/cartpole_ppo.yaml"),
"Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/ant_ppo.yaml"),
"Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/humanoid_ppo.yaml"),
# manipulation
"Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/lift_ppo.yaml"),
"Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/reach_ppo.yaml"),
# locomotion
"Isaac-Velocity-Anymal-C-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/anymal_ppo.yaml"),
}
"""Mapping from environment names to PPO agent files."""
def parse_skrl_cfg(task_name) -> dict:
"""Parse configuration based on command line arguments.
Args:
task_name (str): The name of the environment.
Returns:
dict: A dictionary containing the parsed configuration.
"""
# retrieve the default environment config file
try:
config_file = SKRL_PPO_CONFIG_FILE[task_name]
except KeyError:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file) as f:
cfg = yaml.load(f, Loader=yaml.Loader)
return cfg
def convert_skrl_cfg(cfg):
"""Convert simple YAML types to skrl classes/components.
Args:
cfg (dict): configuration dictionary.
Returns:
dict: A dictionary containing the converted configuration.
"""
_direct_eval = [
"learning_rate_scheduler",
"state_preprocessor",
"value_preprocessor",
"input_shape",
"output_shape",
]
def reward_shaper_function(scale):
def reward_shaper(rewards, timestep, timesteps):
return rewards * scale
return reward_shaper
def update_dict(d):
for key, value in d.items():
if isinstance(value, dict):
update_dict(value)
else:
if key in _direct_eval:
d[key] = eval(value)
elif key.endswith("_kwargs"):
d[key] = value if value is not None else {}
elif key in ["rewards_shaper_scale"]:
d["rewards_shaper"] = reward_shaper_function(value)
update_dict(cfg)
return cfg
"""
Script to play a checkpoint if an RL agent from skrl.
Visit the skrl documentation (https://skrl.readthedocs.io) to see the examples structured in a more user-friendly way
"""
"""Launch Isaac Sim Simulator first."""
import argparse
import os
from omni.isaac.kit import SimulationApp
# add argparse arguments
parser = argparse.ArgumentParser("Welcome to Orbit: Omniverse Robotics Environments!")
parser.add_argument("--headless", action="store_true", default=False, help="Force display off at all times.")
parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
args_cli = parser.parse_args()
# launch the simulator
config = {"headless": args_cli.headless}
simulation_app = SimulationApp(config)
"""Rest everything follows."""
import gym
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.utils.model_instantiators import deterministic_model, gaussian_model, shared_model
import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlVecEnvWrapper
from config import convert_skrl_cfg, parse_skrl_cfg
def main():
"""Play with skrl agent."""
# parse env configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
experiment_cfg = parse_skrl_cfg(args_cli.task)
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, headless=args_cli.headless)
# wrap around environment for skrl
env = SkrlVecEnvWrapper(env) # same as: `wrap_env(env, wrapper="isaac-orbit")`
# instantiate models using skrl model instantiator utility
# https://skrl.readthedocs.io/en/latest/modules/skrl.utils.model_instantiators.html
models = {}
# non-shared models
if experiment_cfg["models"]["separate"]:
models["policy"] = gaussian_model(
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["policy"]),
)
models["value"] = deterministic_model(
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["value"]),
)
# shared models
else:
models["policy"] = shared_model(
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
structure=None,
roles=["policy", "value"],
parameters=[
convert_skrl_cfg(experiment_cfg["models"]["policy"]),
convert_skrl_cfg(experiment_cfg["models"]["value"]),
],
)
models["value"] = models["policy"]
# configure and instantiate PPO agent
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent_cfg = PPO_DEFAULT_CONFIG.copy()
experiment_cfg["agent"]["rewards_shaper"] = None # avoid 'dictionary changed size during iteration'
agent_cfg.update(convert_skrl_cfg(experiment_cfg["agent"]))
agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device})
agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device})
agent_cfg["experiment"]["write_interval"] = 0 # don't log to Tensorboard
agent_cfg["experiment"]["checkpoint_interval"] = 0 # don't generate checkpoints
agent = PPO(
models=models,
memory=None, # memory is optional during evaluation
cfg=agent_cfg,
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
)
# specify directory for logging experiments (load checkpoint)
log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
# get checkpoint path
if args_cli.checkpoint:
resume_path = os.path.abspath(args_cli.checkpoint)
else:
resume_path = get_checkpoint_path(log_root_path, os.path.join("*", "checkpoints"), None)
print(f"[INFO] Loading model checkpoint from: {resume_path}")
agent.init()
agent.load(resume_path)
def get_actions(obs):
return agent.act(obs, timestep=0, timesteps=0)[0]
# reset environment
obs, _ = env.reset()
# simulate environment
while simulation_app.is_running():
# agent stepping
actions = get_actions(obs)
# env stepping
obs, _, _, _, _ = env.step(actions)
# check if simulator is stopped
if env.sim.is_stopped():
break
# close the simulator
env.close()
simulation_app.close()
if __name__ == "__main__":
main()
"""
Script to train RL agent with skrl.
Visit the skrl documentation (https://skrl.readthedocs.io) to see the examples structured in a more user-friendly way
"""
"""Launch Isaac Sim Simulator first."""
import argparse
import os
from omni.isaac.kit import SimulationApp
# add argparse arguments
parser = argparse.ArgumentParser("Welcome to Orbit: Omniverse Robotics Environments!")
parser.add_argument("--headless", action="store_true", default=False, help="Force display off at all times.")
parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
args_cli = parser.parse_args()
# launch the simulator
config = {"headless": args_cli.headless}
# load cheaper kit config in headless
if args_cli.headless:
app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.gym.headless.kit"
else:
app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.kit"
# launch the simulator
simulation_app = SimulationApp(config, experience=app_experience)
"""Rest everything follows."""
import gym
from datetime import datetime
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.memories.torch import RandomMemory
from skrl.utils import set_seed
from skrl.utils.model_instantiators import deterministic_model, gaussian_model, shared_model
from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlLogTrainer, SkrlVecEnvWrapper
from config import convert_skrl_cfg, parse_skrl_cfg
def main():
"""Train with skrl agent."""
args_cli_seed = args_cli.seed
# parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
experiment_cfg = parse_skrl_cfg(args_cli.task)
# specify directory for logging experiments
log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs
log_dir = datetime.now().strftime("%b%d_%H-%M-%S")
if experiment_cfg["agent"]["experiment"]["experiment_name"]:
log_dir += f'_{experiment_cfg["agent"]["experiment"]["experiment_name"]}'
# set directory into agent config
experiment_cfg["agent"]["experiment"]["directory"] = log_root_path
experiment_cfg["agent"]["experiment"]["experiment_name"] = log_dir
# update log_dir
log_dir = os.path.join(log_root_path, log_dir)
# dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), experiment_cfg)
dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), experiment_cfg)
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, headless=args_cli.headless)
# wrap around environment for skrl
env = SkrlVecEnvWrapper(env) # same as: `wrap_env(env, wrapper="isaac-orbit")`
# set seed for the experiment (override from command line)
set_seed(args_cli_seed if args_cli_seed is not None else experiment_cfg["seed"])
# instantiate models using skrl model instantiator utility
# https://skrl.readthedocs.io/en/latest/modules/skrl.utils.model_instantiators.html
models = {}
# non-shared models
if experiment_cfg["models"]["separate"]:
models["policy"] = gaussian_model(
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["policy"]),
)
models["value"] = deterministic_model(
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["value"]),
)
# shared models
else:
models["policy"] = shared_model(
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
structure=None,
roles=["policy", "value"],
parameters=[
convert_skrl_cfg(experiment_cfg["models"]["policy"]),
convert_skrl_cfg(experiment_cfg["models"]["value"]),
],
)
models["value"] = models["policy"]
# instantiate a RandomMemory as rollout buffer (any memory can be used for this)
# https://skrl.readthedocs.io/en/latest/modules/skrl.memories.random.html
memory_size = experiment_cfg["agent"]["rollouts"] # memory_size is the agent's number of rollouts
memory = RandomMemory(memory_size=memory_size, num_envs=env.num_envs, device=env.device)
# configure and instantiate PPO agent
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent_cfg = PPO_DEFAULT_CONFIG.copy()
experiment_cfg["agent"]["rewards_shaper"] = None # avoid 'dictionary changed size during iteration'
agent_cfg.update(convert_skrl_cfg(experiment_cfg["agent"]))
agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device})
agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device})
agent = PPO(
models=models,
memory=memory,
cfg=agent_cfg,
observation_space=env.observation_space,
action_space=env.action_space,
device=env.device,
)
# configure and instantiate a custom RL trainer for logging episode events
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.base_class.html
trainer_cfg = experiment_cfg["trainer"]
trainer = SkrlLogTrainer(cfg=trainer_cfg, env=env, agents=agent)
# train the agent
trainer.train()
# close the simulator
env.close()
simulation_app.close()
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment