Unverified Commit e7d43de3 authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Reorganizes configuration files for locomotion velocity environment (#220)

# Description

This MR does the following:

* Changes the way environment and agent configs are handled. These are
now moved to their respective `env` folder.
* Replaces YAML file with configclasses for rsl_rl library
* Adds examples of other robots for locomotion-velocity task: anymal-b,
unitree-a1, and anymal-d (placeholder)

The new environment folder structure is as follows:

```
my_task (folder)
   |- mdp  (folder where all user's task-specific terms are stored)
   |- config
       |- robot_name_1  (folder)
            |- robot_specific_env_cfg.py
            | agents (folder)
                |- rsl_rl_cfg.py
                |- sb3_cfg.py
                |- ....
            |- __init__.py (register robot-specific tasks)
       |- robot_name_2 (folder)  
   |- my_task_env_cfg.py (base env config)
   |- my_task_env.py (base env -- optional -- should normally use RLEnv)
```

Fixes #142 

## Type of change

- New feature (non-breaking change which adds functionality)
- Breaking change (fix or feature that would cause existing
functionality to not work as expected)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file

---------
Co-authored-by: 's avatarNikita Rudin <nrudin@nvidia.com>
parent a9b5df11
[package] [package]
# Note: Semantic Versioning is used: https://semver.org/ # Note: Semantic Versioning is used: https://semver.org/
version = "0.9.29" version = "0.9.30"
# Description # Description
title = "ORBIT framework for Robot Learning" title = "ORBIT framework for Robot Learning"
......
Changelog Changelog
--------- ---------
0.9.30 (2023-11-01)
~~~~~~~~~~~~~~~~~~~
Fixed
^^^^^
* Added skipping of None values in the :class:`InteractiveScene` class when creating the scene from configuration
objects. Earlier, it was throwing an error when the user passed a None value for a scene element.
* Added ``kwargs`` to the :class:`RLEnv` class to allow passing additional arguments from gym registry function.
This is now needed since the registry function passes args beyond the ones specified in the constructor.
0.9.29 (2023-11-01) 0.9.29 (2023-11-01)
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
......
...@@ -28,7 +28,7 @@ from ..articulation import ArticulationCfg ...@@ -28,7 +28,7 @@ from ..articulation import ArticulationCfg
UNITREE_A1_CFG = ArticulationCfg( UNITREE_A1_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg( spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_ORBIT_NUCLEUS_DIR}/Robots/Unitree/A1/a1_instanceable.usd", usd_path=f"{ISAAC_ORBIT_NUCLEUS_DIR}/Robots/Unitree/A1/a1_instanceable.usd",
activate_contact_sensors=False, activate_contact_sensors=True,
rigid_props=sim_utils.RigidBodyPropertiesCfg( rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False, disable_gravity=False,
retain_accelerations=False, retain_accelerations=False,
...@@ -49,10 +49,11 @@ UNITREE_A1_CFG = ArticulationCfg( ...@@ -49,10 +49,11 @@ UNITREE_A1_CFG = ArticulationCfg(
".*R_hip_joint": -0.1, ".*R_hip_joint": -0.1,
"F[L,R]_thigh_joint": 0.8, "F[L,R]_thigh_joint": 0.8,
"R[L,R]_thigh_joint": 1.0, "R[L,R]_thigh_joint": 1.0,
".*_calf_joint": -1.8, ".*_calf_joint": -1.5,
}, },
joint_vel={".*": 0.0}, joint_vel={".*": 0.0},
), ),
soft_joint_pos_limit_factor=0.9,
actuators={ actuators={
"base_legs": DCMotorCfg( "base_legs": DCMotorCfg(
joint_names_expr=[".*"], joint_names_expr=[".*"],
......
...@@ -78,7 +78,7 @@ class RLEnv(BaseEnv, gym.Env): ...@@ -78,7 +78,7 @@ class RLEnv(BaseEnv, gym.Env):
cfg: RLEnvCfg cfg: RLEnvCfg
"""Configuration for the environment.""" """Configuration for the environment."""
def __init__(self, cfg: RLEnvCfg): def __init__(self, cfg: RLEnvCfg, **kwargs):
# initialize the base class to setup the scene. # initialize the base class to setup the scene.
super().__init__(cfg=cfg) super().__init__(cfg=cfg)
......
...@@ -327,7 +327,7 @@ class InteractiveScene: ...@@ -327,7 +327,7 @@ class InteractiveScene:
for asset_name, asset_cfg in self.cfg.__dict__.items(): for asset_name, asset_cfg in self.cfg.__dict__.items():
# skip keywords # skip keywords
# note: easier than writing a list of keywords: [num_envs, env_spacing, lazy_sensor_update] # note: easier than writing a list of keywords: [num_envs, env_spacing, lazy_sensor_update]
if asset_name in InteractiveSceneCfg.__dataclass_fields__: if asset_name in InteractiveSceneCfg.__dataclass_fields__ or asset_cfg is None:
continue continue
# resolve regex # resolve regex
asset_cfg.prim_path = asset_cfg.prim_path.format(ENV_REGEX_NS=self.env_regex_ns) asset_cfg.prim_path = asset_cfg.prim_path.format(ENV_REGEX_NS=self.env_regex_ns)
......
[package] [package]
# Note: Semantic Versioning is used: https://semver.org/ # Note: Semantic Versioning is used: https://semver.org/
version = "0.4.3" version = "0.5.0"
# Description # Description
title = "ORBIT Environments" title = "ORBIT Environments"
......
params:
seed: 12344
# environment wrapper clipping
env:
clip_observations: 5.0
clip_actions: 1.0
algo:
name: sac
model:
name: soft_actor_critic
network:
name: soft_actor_critic
separate: True
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [32, 32]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
log_std_bounds: [-5, 2]
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: cartpole
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
# ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1
reward_shaper:
scale_value: 0.1
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 65
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
batch_size: 8192
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_len: 4
bounds_loss_coef: 0.0001
num_seed_steps: 5
num_steps_per_episode: 16
critic_tau: 0.005
init_alpha: 1
learnable_temperature: True
replay_buffer_size: 1000000
actor_lr: 0.0005
critic_lr: 0.0005
alpha_lr: 0.005
# seed for the experiment
seed: 42
# device for the rl-agent
device: 'cuda:0'
policy:
init_noise_std: 1.0
actor_hidden_dims: [512, 256, 128]
critic_hidden_dims: [512, 256, 128]
activation: "elu" # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
# only required when "policy_class_name" is'ActorCriticRecurrent':
# rnn_type: 'lstm'
# rnn_hidden_size: 512
# rnn_num_layers: 1
algorithm:
# training params
value_loss_coef: 1.0
use_clipped_value_loss: True
clip_param: 0.2
entropy_coef: 0.005
num_learning_epochs: 5
num_mini_batches: 4 # mini batch size: num_envs * nsteps / nminibatches
learning_rate: 1.0e-3 # 5.e-4
schedule: "adaptive" # adaptive, fixed
gamma: 0.99
lam: 0.95
desired_kl: 0.01
max_grad_norm: 1.0
runner:
policy_class_name: "ActorCritic"
algorithm_class_name: "PPO"
num_steps_per_env: 24 # per iteration
max_iterations: 1500 # number of policy updates
empirical_normalization: False
# logging
save_interval: 50 # check for potential saves every this many iterations
experiment_name: "anymal_c"
run_name: ""
# load and resume
resume: False
load_run: -1 # -1: last run
checkpoint: -1 # -1: last saved model
resume_path: None # updated from load_run and chkpt
# seed for the experiment
seed: 67854
# device for the rl-agent
device: 'cuda:0'
policy:
init_noise_std: 1.0
actor_hidden_dims: [256, 128, 64]
critic_hidden_dims: [256, 128, 64]
activation: "elu" # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
# only required when "policy_class_name" is'ActorCriticRecurrent':
# rnn_type: 'lstm'
# rnn_hidden_size: 512
# rnn_num_layers: 1
algorithm:
# training params
value_loss_coef: 1.0
use_clipped_value_loss: True
# Seems to be the same as clip_range
clip_param: 0.2
# Seems to be the same as ent_coef
entropy_coef: 0.00
# ? seems to be the same as n_epochs
num_learning_epochs: 8
# seems to be the same as batch_size
num_mini_batches: 8 # mini batch size: num_envs * nsteps / nminibatches
# seems to be same as learning_ratena
learning_rate: 5.0e-4 # 5.e-4
schedule: "adaptive" # adaptive, fixed
# same as gamma
gamma: 0.99
# seems to be the same as gae_lambda
lam: 0.95
# seems to be the same as target_kl
desired_kl: 0.008
# seems to be the same as max_grad_norm
max_grad_norm: 1.0
runner:
policy_class_name: "ActorCritic"
algorithm_class_name: "PPO"
num_steps_per_env: 64 # per iteration
max_iterations: 200 # number of policy updates
# logging
save_interval: 50 # check for potential saves every this many iterations
experiment_name: "cabinet"
run_name: ""
# load and resume
resume: False
load_run: -1 # -1: last run
checkpoint: -1 # -1: last saved model
resume_path: None # updated from load_run and chkpt
# seed for the experiment
seed: 42
# device for the rl-agent
device: 'cuda:0'
policy:
init_noise_std: 1.0
actor_hidden_dims: [256, 128, 64]
critic_hidden_dims: [256, 128, 64]
activation: "elu" # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
# only required when "policy_class_name" is'ActorCriticRecurrent':
# rnn_type: 'lstm'
# rnn_hidden_size: 512
# rnn_num_layers: 1
algorithm:
# training params
value_loss_coef: 1.0
use_clipped_value_loss: True
clip_param: 0.2
entropy_coef: 0.01
num_learning_epochs: 8
num_mini_batches: 4 # mini batch size: num_envs * nsteps / nminibatches
learning_rate: 1.0e-3 # 5.e-4
schedule: "adaptive" # adaptive, fixed
gamma: 0.99
lam: 0.95
desired_kl: 0.01
max_grad_norm: 1.0
runner:
policy_class_name: "ActorCritic"
algorithm_class_name: "PPO"
num_steps_per_env: 16 # per iteration
max_iterations: 500 # number of policy updates
# logging
save_interval: 50 # check for potential saves every this many iterations
experiment_name: "cartpole"
run_name: ""
# load and resume
resume: False
load_run: -1 # -1: last run
checkpoint: -1 # -1: last saved model
resume_path: None # updated from load_run and chkpt
# seed for the experiment
seed: 42
# device for the rl-agent
device: 'cuda:0'
policy:
init_noise_std: 1.0
actor_hidden_dims: [256, 128, 64]
critic_hidden_dims: [256, 128, 64]
activation: "elu" # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
# only required when "policy_class_name" is'ActorCriticRecurrent':
# rnn_type: 'lstm'
# rnn_hidden_size: 512
# rnn_num_layers: 1
algorithm:
# training params
value_loss_coef: 1.0
use_clipped_value_loss: True
clip_param: 0.2
entropy_coef: 0.01
num_learning_epochs: 5
num_mini_batches: 4 # mini batch size: num_envs * nsteps / nminibatches
learning_rate: 1.0e-3 # 5.e-4
schedule: "adaptive" # adaptive, fixed
gamma: 0.99
lam: 0.95
desired_kl: 0.01
max_grad_norm: 1.0
runner:
policy_class_name: "ActorCritic"
algorithm_class_name: "PPO"
num_steps_per_env: 96 # per iteration
max_iterations: 700 # number of policy updates
# logging
save_interval: 50 # check for potential saves every this many iterations
experiment_name: "lift"
run_name: ""
# load and resume
resume: False
load_run: -1 # -1: last run
checkpoint: -1 # -1: last saved model
resume_path: None # updated from load_run and chkpt
# seed for the experiment
seed: 42
# device for the rl-agent
device: 'cuda:0'
policy:
init_noise_std: 1.0
actor_hidden_dims: [256, 128, 64]
critic_hidden_dims: [256, 128, 64]
activation: "elu" # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
# only required when "policy_class_name" is'ActorCriticRecurrent':
# rnn_type: 'lstm'
# rnn_hidden_size: 512
# rnn_num_layers: 1
algorithm:
# training params
value_loss_coef: 1.0
use_clipped_value_loss: True
# Seems to be the same as clip_range
clip_param: 0.2
# Seems to be the same as ent_coef
entropy_coef: 0.01
# ? seems to be the same as n_epochs
num_learning_epochs: 8
# seems to be the same as batch_size
num_mini_batches: 64 # mini batch size: num_envs * nsteps / nminibatches
# seems to be same as learning_rate
learning_rate: 3.0e-4 # 5.e-4
schedule: "adaptive" # adaptive, fixed
# same as gamma
gamma: 0.99
# seems to be the same as gae_lambda
lam: 0.95
# seems to be the same as target_kl
desired_kl: 0.01
# seems to be the same as max_grad_norm
max_grad_norm: 1.0
runner:
policy_class_name: "ActorCritic"
algorithm_class_name: "PPO"
num_steps_per_env: 64 # per iteration
max_iterations: 250 # number of policy updates
# logging
save_interval: 50 # check for potential saves every this many iterations
experiment_name: "reach"
run_name: ""
# load and resume
resume: False
load_run: -1 # -1: last run
checkpoint: -1 # -1: last saved model
resume_path: None # updated from load_run and chkpt
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
# 512×500×16
n_timesteps: 4096000
policy: 'MlpPolicy'
train_freq: 16
batch_size: 8192
gamma: 0.99
gradient_steps: 8
ent_coef: 0.0
learning_rate: !!float 3e-4
policy_kwargs: "dict(
log_std_init=-2,
activation_fn=nn.ELU,
net_arch=[32, 32]
)"
# Uses VecNormalize class to normalize obs
normalize_input: True
# Uses VecNormalize class to normalize rew
normalize_value: True
clip_obs: 5
Changelog Changelog
--------- ---------
0.5.0 (2023-10-30)
~~~~~~~~~~~~~~~~~~
Changed
^^^^^^^
* Changed the way agent configs are handled for environments and learning agents. Switched from yaml to configclasses.
0.4.3 (2023-09-25) 0.4.3 (2023-09-25)
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
...@@ -12,6 +20,7 @@ Changed ...@@ -12,6 +20,7 @@ Changed
* Removed the type-hinting from docstrings to simplify maintenance of the documentation. All type-hints are * Removed the type-hinting from docstrings to simplify maintenance of the documentation. All type-hints are
now in the code itself. now in the code itself.
0.4.2 (2023-08-29) 0.4.2 (2023-08-29)
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
......
...@@ -29,12 +29,11 @@ Usage: ...@@ -29,12 +29,11 @@ Usage:
from __future__ import annotations from __future__ import annotations
import importlib
import os import os
import pkgutil
import toml import toml
# TODO: include classics, manipulation again when updated on newest version
from . import locomotion # noqa: F401
# Conveniences to other module directories via relative paths # Conveniences to other module directories via relative paths
ORBIT_ENVS_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) ORBIT_ENVS_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
"""Path to the extension source directory.""" """Path to the extension source directory."""
...@@ -47,3 +46,47 @@ ORBIT_ENVS_METADATA = toml.load(os.path.join(ORBIT_ENVS_EXT_DIR, "config", "exte ...@@ -47,3 +46,47 @@ ORBIT_ENVS_METADATA = toml.load(os.path.join(ORBIT_ENVS_EXT_DIR, "config", "exte
# Configure the module-level variables # Configure the module-level variables
__version__ = ORBIT_ENVS_METADATA["package"]["version"] __version__ = ORBIT_ENVS_METADATA["package"]["version"]
##
# Register Gym environments.
##
def _import_all(package_name: str, blacklist_pkgs: list[str] = None):
"""Import all sub-packages in a package recursively.
It is easier to use this function to import all sub-packages in a package recursively
than to manually import each sub-package.
It replaces the need of the following code:
.. code-block:: python
import .locomotion.velocity
import .manipulation.reach
import .manipulation.lift
Args:
package_name: The package name.
blacklist_pkgs: The list of blacklisted packages to skip. Defaults to None,
which means no packages are blacklisted.
"""
# Default blacklist
if blacklist_pkgs is None:
blacklist_pkgs = []
# Import the package
package = importlib.import_module(package_name)
# Import all Python files
for file_name, module_name, is_pkg in pkgutil.walk_packages(package.__path__, package.__name__ + "."):
# check blacklisted
if any([pkg_name in module_name for pkg_name in blacklist_pkgs]):
continue
if is_pkg:
importlib.import_module(module_name)
_import_all(module_name, blacklist_pkgs)
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "classic", "manipulation", "utils"]
# Import all configs in this package
_import_all(__name__, _BLACKLIST_PKGS)
...@@ -7,6 +7,21 @@ ...@@ -7,6 +7,21 @@
Ant locomotion environment (similar to OpenAI Gym Ant-v2). Ant locomotion environment (similar to OpenAI Gym Ant-v2).
""" """
from .ant_env import AntEnv import gym
__all__ = ["AntEnv"] from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Ant-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
...@@ -7,6 +7,22 @@ ...@@ -7,6 +7,22 @@
Cartpole balancing environment. Cartpole balancing environment.
""" """
from .cartpole_env import CartpoleEnv import gym
__all__ = ["CartpoleEnv"] from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Cartpole-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CARTPOLE_RSL_RL_PPO_CFG",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
CARTPOLE_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=16,
max_iterations=500,
save_interval=50,
experiment_name="cartpole",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
),
)
...@@ -7,6 +7,21 @@ ...@@ -7,6 +7,21 @@
Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2). Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2).
""" """
from .humanoid_env import HumanoidEnv import gym
__all__ = ["HumanoidEnv"] from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Humanoid-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
...@@ -3,32 +3,6 @@ ...@@ -3,32 +3,6 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Locomotion environments for legged robots. """Locomotion environments for legged robots."""
These environments are based on the `legged_gym` environments provided by Rudin et al. from .velocity import * # noqa
Reference:
https://github.com/leggedrobotics/legged_gym
"""
import gym
from .locomotion_env_cfg import LocomotionEnvRoughCfg, LocomotionEnvRoughCfg_PLAY
__all__ = ["LocomotionEnvRoughCfg", "LocomotionEnvRoughCfg_PLAY"]
##
# Register Gym environments.
##
gym.register(
id="Isaac-Velocity-Rough-Anymal-C-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={"cfg_entry_point": "omni.isaac.orbit_envs.locomotion:LocomotionEnvRoughCfg"},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-C-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={"cfg_entry_point": "omni.isaac.orbit_envs.locomotion:LocomotionEnvRoughCfg_PLAY"},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Locomotion environments with velocity-tracking commands.
These environments are based on the `legged_gym` environments provided by Rudin et al.
Reference:
https://github.com/leggedrobotics/legged_gym
"""
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configurations for velocity-based locomotion environments."""
# We leave this file empty since we don't want to expose any configs in this package directly.
# We still need this file to import the "config" module in the parent package.
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gym
from . import agents, flat_env_cfg, rough_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Velocity-Flat-Anymal-B-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.AnymalBFlatEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBFlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Flat-Anymal-B-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.AnymalBFlatEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBFlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-B-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalBRoughEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBRoughPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-B-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalBRoughEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBRoughPPORunnerCfg,
},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_cfg # noqa: F401, F403
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class AnymalBRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1500
save_interval = 50
experiment_name = "anymal_b_rough"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 256, 128],
critic_hidden_dims=[512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
@configclass
class AnymalBFlatPPORunnerCfg(AnymalBRoughPPORunnerCfg):
def __post_init__(self):
super().__post_init__()
self.max_iterations = 300
self.experiment_name = "anymal_b_flat"
self.policy.actor_hidden_dims = [128, 128, 128]
self.policy.critic_hidden_dims = [128, 128, 128]
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_B_CFG
@configclass
class AnymalBFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to anymal-d
self.scene.robot = ANYMAL_B_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override rewards
self.rewards.flat_orientation_l2.weight = -5.0
self.rewards.dof_torques_l2.weight = -2.5e-5
self.rewards.feet_air_time.weight = 2.0
# change terrain to flat
self.scene.terrain.terrain_type = "plane"
self.scene.terrain.terrain_generator = None
# no height scan
self.scene.height_scanner = None
self.observations.policy.height_scan = None
# no terrain curriculum
self.curriculum.terrain_levels = None
class AnymalBFlatEnvCfg_PLAY(AnymalBFlatEnvCfg):
def __post_init__(self) -> None:
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_B_CFG
@configclass
class AnymalBRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to anymal-d
self.scene.robot = ANYMAL_B_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
@configclass
class AnymalBRoughEnvCfg_PLAY(AnymalBRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# spawn the robot randomly in the grid (instead of their terrain levels)
self.scene.terrain.max_init_terrain_level = None
# reduce the number of terrains to save memory
if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.num_rows = 5
self.scene.terrain.terrain_generator.num_cols = 5
self.scene.terrain.terrain_generator.curriculum = False
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gym
from . import agents, flat_env_cfg, rough_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Velocity-Flat-Anymal-C-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.AnymalCFlatEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCFlatPPORunnerCfg,
"skrl_cfg_entry_point": "omni.isaac.orbit_envs.locomotion.velocity.anymal_c.agents:skrl_cfg.yaml",
},
)
gym.register(
id="Isaac-Velocity-Flat-Anymal-C-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.AnymalCFlatEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCFlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-C-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalCRoughEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCRoughPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-C-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalCRoughEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCRoughPPORunnerCfg,
},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_cfg # noqa: F401, F403
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1500
save_interval = 50
experiment_name = "anymal_c_rough"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 256, 128],
critic_hidden_dims=[512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
@configclass
class AnymalCFlatPPORunnerCfg(AnymalCRoughPPORunnerCfg):
def __post_init__(self):
super().__post_init__()
self.max_iterations = 300
self.experiment_name = "anymal_c_flat"
self.policy.actor_hidden_dims = [128, 128, 128]
self.policy.critic_hidden_dims = [128, 128, 128]
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_C_CFG
@configclass
class AnymalCFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to anymal-c
self.scene.robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override rewards
self.rewards.flat_orientation_l2.weight = -5.0
self.rewards.dof_torques_l2.weight = -2.5e-5
self.rewards.feet_air_time.weight = 2.0
# change terrain to flat
self.scene.terrain.terrain_type = "plane"
self.scene.terrain.terrain_generator = None
# no height scan
self.scene.height_scanner = None
self.observations.policy.height_scan = None
# no terrain curriculum
self.curriculum.terrain_levels = None
class AnymalCFlatEnvCfg_PLAY(AnymalCFlatEnvCfg):
def __post_init__(self) -> None:
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_C_CFG
@configclass
class AnymalCRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to anymal-c
self.scene.robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
@configclass
class AnymalCRoughEnvCfg_PLAY(AnymalCRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# spawn the robot randomly in the grid (instead of their terrain levels)
self.scene.terrain.max_init_terrain_level = None
# reduce the number of terrains to save memory
if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.num_rows = 5
self.scene.terrain.terrain_generator.num_cols = 5
self.scene.terrain.terrain_generator.curriculum = False
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gym
from . import agents, flat_env_cfg, rough_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Velocity-Flat-Anymal-D-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDFlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Flat-Anymal-D-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDFlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-D-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDRoughPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Anymal-D-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDRoughPPORunnerCfg,
},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_cfg # noqa: F401, F403
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class AnymalDRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1500
save_interval = 50
experiment_name = "anymal_d_rough"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 256, 128],
critic_hidden_dims=[512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
@configclass
class AnymalDFlatPPORunnerCfg(AnymalDRoughPPORunnerCfg):
def __post_init__(self):
super().__post_init__()
self.max_iterations = 300
self.experiment_name = "anymal_d_flat"
self.policy.actor_hidden_dims = [128, 128, 128]
self.policy.critic_hidden_dims = [128, 128, 128]
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_D_CFG
@configclass
class AnymalDFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to anymal-d
self.scene.robot = ANYMAL_D_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override rewards
self.rewards.flat_orientation_l2.weight = -5.0
self.rewards.dof_torques_l2.weight = -2.5e-5
self.rewards.feet_air_time.weight = 2.0
# change terrain to flat
self.scene.terrain.terrain_type = "plane"
self.scene.terrain.terrain_generator = None
# no height scan
self.scene.height_scanner = None
self.observations.policy.height_scan = None
# no terrain curriculum
self.curriculum.terrain_levels = None
class AnymalDFlatEnvCfg_PLAY(AnymalDFlatEnvCfg):
def __post_init__(self) -> None:
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_D_CFG
@configclass
class AnymalDRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to anymal-d
self.scene.robot = ANYMAL_D_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
@configclass
class AnymalDRoughEnvCfg_PLAY(AnymalDRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# spawn the robot randomly in the grid (instead of their terrain levels)
self.scene.terrain.max_init_terrain_level = None
# reduce the number of terrains to save memory
if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.num_rows = 5
self.scene.terrain.terrain_generator.num_cols = 5
self.scene.terrain.terrain_generator.curriculum = False
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gym
from . import agents, flat_env_cfg, rough_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Velocity-Flat-Unitree-A1-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.UnitreeA1FlatEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1FlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Flat-Unitree-A1-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": flat_env_cfg.UnitreeA1FlatEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1FlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Unitree-A1-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.UnitreeA1RoughEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1RoughPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Rough-Unitree-A1-Play-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": rough_env_cfg.UnitreeA1RoughEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1RoughPPORunnerCfg,
},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_cfg # noqa: F401, F403
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class UnitreeA1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1500
save_interval = 50
experiment_name = "unitree_a1_rough"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 256, 128],
critic_hidden_dims=[512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
@configclass
class UnitreeA1FlatPPORunnerCfg(UnitreeA1RoughPPORunnerCfg):
def __post_init__(self):
super().__post_init__()
self.max_iterations = 300
self.experiment_name = "unitree_a1_flat"
self.policy.actor_hidden_dims = [128, 128, 128]
self.policy.critic_hidden_dims = [128, 128, 128]
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.unitree import UNITREE_A1_CFG
@configclass
class UnitreeA1FlatEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to unitree a1
self.scene.robot = UNITREE_A1_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# reduce action scale
self.actions.joint_pos.scale = 0.25
# override rewards
self.rewards.flat_orientation_l2.weight = -2.5
self.rewards.feet_air_time.weight = 1.0
# self.rewards.dof_torques_l2.weight = -2.0e-4
# self.rewards.dof_pos_limits.weight = -10.0
# change terrain to flat
self.scene.terrain.terrain_type = "plane"
self.scene.terrain.terrain_generator = None
# no height scan
self.scene.height_scanner = None
self.observations.policy.height_scan = None
# no terrain curriculum
self.curriculum.terrain_levels = None
# disable pushing for now
self.randomization.push_robot = None
self.randomization.add_base_mass = None
# change body and joint names
# TODO: Change to .*foot once we make a new USD for the robot
self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*calf"
self.rewards.undesired_contacts.params["sensor_cfg"].body_names = [".*thigh", ".*hip"]
class UnitreeA1FlatEnvCfg_PLAY(UnitreeA1FlatEnvCfg):
def __post_init__(self) -> None:
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.unitree import UNITREE_A1_CFG
@configclass
class UnitreeA1RoughEnvCfg(LocomotionVelocityRoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to unitree-a1
self.scene.robot = UNITREE_A1_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# reduce action scale
self.actions.joint_pos.scale = 0.25
# override rewards
# self.rewards.dof_torques_l2.weight = -2.0e-4
# self.rewards.dof_pos_limits.weight = -10.0
# disable pushing for now
self.randomization.push_robot = None
self.randomization.add_base_mass = None
# change body and joint names
self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*calf" # TODO: Change to .*foot
self.rewards.undesired_contacts.params["sensor_cfg"].body_names = [".*thigh", ".*hip"]
@configclass
class UnitreeA1RoughEnvCfg_PLAY(UnitreeA1RoughEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# spawn the robot randomly in the grid (instead of their terrain levels)
self.scene.terrain.max_init_terrain_level = None
# reduce the number of terrains to save memory
if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.num_rows = 5
self.scene.terrain.terrain_generator.num_cols = 5
self.scene.terrain.terrain_generator.curriculum = False
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
...@@ -6,9 +6,10 @@ ...@@ -6,9 +6,10 @@
from __future__ import annotations from __future__ import annotations
import math import math
from dataclasses import MISSING
import omni.isaac.orbit.sim as sim_utils import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.assets import AssetBaseCfg from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.command_generators import UniformVelocityCommandGeneratorCfg from omni.isaac.orbit.command_generators import UniformVelocityCommandGeneratorCfg
from omni.isaac.orbit.envs import RLEnvCfg from omni.isaac.orbit.envs import RLEnvCfg
from omni.isaac.orbit.managers import CurriculumTermCfg as CurrTerm from omni.isaac.orbit.managers import CurriculumTermCfg as CurrTerm
...@@ -24,13 +25,12 @@ from omni.isaac.orbit.terrains import TerrainImporterCfg ...@@ -24,13 +25,12 @@ from omni.isaac.orbit.terrains import TerrainImporterCfg
from omni.isaac.orbit.utils import configclass from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.noise import AdditiveUniformNoiseCfg as Unoise from omni.isaac.orbit.utils.noise import AdditiveUniformNoiseCfg as Unoise
import omni.isaac.orbit_envs.locomotion.mdp as mdp import omni.isaac.orbit_envs.locomotion.velocity.mdp as mdp
## ##
# Pre-defined configs # Pre-defined configs
## ##
# isort: off # isort: off
from omni.isaac.orbit.assets.config.anymal import ANYMAL_C_CFG
from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG
...@@ -40,7 +40,7 @@ from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG ...@@ -40,7 +40,7 @@ from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG
@configclass @configclass
class TerrainSceneCfg(InteractiveSceneCfg): class MySceneCfg(InteractiveSceneCfg):
"""Configuration for the terrain scene with a legged robot.""" """Configuration for the terrain scene with a legged robot."""
# ground terrain # ground terrain
...@@ -59,7 +59,7 @@ class TerrainSceneCfg(InteractiveSceneCfg): ...@@ -59,7 +59,7 @@ class TerrainSceneCfg(InteractiveSceneCfg):
debug_vis=False, debug_vis=False,
) )
# robots # robots
robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") robot: ArticulationCfg = MISSING
# sensors # sensors
height_scanner = RayCasterCfg( height_scanner = RayCasterCfg(
prim_path="{ENV_REGEX_NS}/Robot/base", prim_path="{ENV_REGEX_NS}/Robot/base",
...@@ -73,10 +73,7 @@ class TerrainSceneCfg(InteractiveSceneCfg): ...@@ -73,10 +73,7 @@ class TerrainSceneCfg(InteractiveSceneCfg):
# lights # lights
light = AssetBaseCfg( light = AssetBaseCfg(
prim_path="/World/light", prim_path="/World/light",
spawn=sim_utils.DistantLightCfg( spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
color=(1.0, 1.0, 1.0),
intensity=1000.0,
),
) )
...@@ -217,6 +214,9 @@ class RewardsCfg: ...@@ -217,6 +214,9 @@ class RewardsCfg:
weight=-1.0, weight=-1.0,
params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), "threshold": 1.0}, params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), "threshold": 1.0},
) )
# -- optional penalties
flat_orientation_l2 = RewTerm(func=mdp.flat_orientation_l2, weight=0.0)
dof_pos_limits = RewTerm(func=mdp.joint_pos_limits, weight=0.0)
@configclass @configclass
...@@ -243,11 +243,11 @@ class CurriculumCfg: ...@@ -243,11 +243,11 @@ class CurriculumCfg:
@configclass @configclass
class LocomotionEnvRoughCfg(RLEnvCfg): class LocomotionVelocityRoughEnvCfg(RLEnvCfg):
"""Configuration for the locomotion velocity-tracking environment.""" """Configuration for the locomotion velocity-tracking environment."""
# Scene settings # Scene settings
scene: TerrainSceneCfg = TerrainSceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False) scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False)
# Basic settings # Basic settings
observations: ObservationsCfg = ObservationsCfg() observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg() actions: ActionsCfg = ActionsCfg()
...@@ -279,8 +279,10 @@ class LocomotionEnvRoughCfg(RLEnvCfg): ...@@ -279,8 +279,10 @@ class LocomotionEnvRoughCfg(RLEnvCfg):
self.sim.physics_material = self.scene.terrain.physics_material self.sim.physics_material = self.scene.terrain.physics_material
# update sensor update periods # update sensor update periods
# we tick all the sensors based on the smallest update period (physics update period) # we tick all the sensors based on the smallest update period (physics update period)
self.scene.height_scanner.update_period = self.decimation * self.sim.dt if self.scene.height_scanner is not None:
self.scene.contact_forces.update_period = self.sim.dt self.scene.height_scanner.update_period = self.decimation * self.sim.dt
if self.scene.contact_forces is not None:
self.scene.contact_forces.update_period = self.sim.dt
# check if terrain levels curriculum is enabled - if so, enable curriculum for terrain generator # check if terrain levels curriculum is enabled - if so, enable curriculum for terrain generator
# this generates terrains with increasing difficulty and is useful for training # this generates terrains with increasing difficulty and is useful for training
...@@ -290,25 +292,3 @@ class LocomotionEnvRoughCfg(RLEnvCfg): ...@@ -290,25 +292,3 @@ class LocomotionEnvRoughCfg(RLEnvCfg):
else: else:
if self.scene.terrain.terrain_generator is not None: if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.curriculum = False self.scene.terrain.terrain_generator.curriculum = False
@configclass
class LocomotionEnvRoughCfg_PLAY(LocomotionEnvRoughCfg):
"""Configuration for the locomotion velocity environment."""
def __post_init__(self) -> None:
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# spawn the robot randomly in the grid (instead of their terrain levels)
self.scene.terrain.max_init_terrain_level = None
# reduce the number of terrains to save memory
if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.num_rows = 5
self.scene.terrain.terrain_generator.num_cols = 5
self.scene.terrain.terrain_generator.curriculum = False
# disable randomization for play
self.observations.policy.enable_corruption = False
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
# TODO: be moved once CabinetEnv exists
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
CABINET_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=64,
max_iterations=200,
save_interval=50,
experiment_name="cabinet",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.00,
num_learning_epochs=8,
num_mini_batches=8,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.008,
max_grad_norm=1.0,
),
)
...@@ -3,9 +3,28 @@ ...@@ -3,9 +3,28 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Environment for lifting objects with fixed-arm robots.""" """
Environment for lifting an object with fixed-base robot.
"""
from .lift_cfg import LiftEnvCfg import gym
from .lift_env import LiftEnv
__all__ = ["LiftEnv", "LiftEnvCfg"] from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Lift-Franka-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}.lift_env_cfg:LiftEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
"robomimic_bc_cfg_entry_point": f"{agents.__name__}.robomimic:bc.json",
"robomimic_bcq_cfg_entry_point": f"{agents.__name__}.robomimic:bcq.json",
},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
LIFT_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=96,
max_iterations=700,
save_interval=50,
experiment_name="lift",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
),
)
...@@ -5,6 +5,21 @@ ...@@ -5,6 +5,21 @@
"""Environment for end-effector pose tracking task for fixed-arm robots.""" """Environment for end-effector pose tracking task for fixed-arm robots."""
from .reach_env import ReachEnv, ReachEnvCfg import gym
__all__ = ["ReachEnv", "ReachEnvCfg"] from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-Franka-v0",
entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}.reach_env_cfg:ReachEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
REACH_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=64,
max_iterations=250,
save_interval=50,
experiment_name="reach",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
num_mini_batches=64,
learning_rate=3.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
),
)
...@@ -3,68 +3,8 @@ ...@@ -3,68 +3,8 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Utilities for environments.""" """Utilities and wrappers for environments."""
from .parse_cfg import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
import os __all__ = ["load_cfg_from_registry", "parse_env_cfg", "get_checkpoint_path"]
# submodules
from .parse_cfg import load_default_env_cfg, parse_env_cfg
__all__ = ["load_default_env_cfg", "parse_env_cfg", "get_checkpoint_path"]
def get_checkpoint_path(log_path: str, run_dir: str = "*", checkpoint: str = None) -> str:
"""Get path to the model checkpoint in input directory.
The checkpoint file is resolved as: <log_path>/<run_dir>/<checkpoint>.
Args:
log_path: The log directory path to find models in.
run_dir: The name of the directory containing the run. Defaults to the most
recent directory created inside :obj:`log_dir`.
checkpoint: The model checkpoint file or directory name. Defaults to the most recent
recent torch-model saved in the :obj:`run_dir` directory.
Raises:
ValueError: When no runs are found in the input directory.
ValueError: When no checkpoints are found in the input directory.
Returns:
The path to the model checkpoint.
Reference:
https://github.com/leggedrobotics/legged_gym/blob/master/legged_gym/utils/helpers.py#L103
"""
# check if runs present in directory
try:
# find all runs in the directory
runs = [os.path.join(log_path, run) for run in os.scandir(log_path)]
runs = [run for run in runs if os.path.isdir(run)]
# sort by date to handle change of month
runs = sorted(runs, key=os.path.getmtime)
# create last run file path
last_run_path = runs[-1]
except IndexError:
raise ValueError(f"No runs present in the directory: {log_path}")
# path to the directory containing the run
if run_dir.startswith("*"):
# check if there are other paths. Example: "*/nn"
run_path = run_dir.replace("*", last_run_path)
else:
run_path = os.path.join(log_path, run_dir)
# name of model checkpoint
if checkpoint is None:
# list all model checkpoints in the directory
model_checkpoints = [f for f in os.listdir(run_path) if ".pt" in f]
# check if any checkpoints are present
if len(model_checkpoints) == 0:
raise ValueError(f"No checkpoints present in the directory: {run_path}")
# sort by date
model_checkpoints.sort(key=lambda m: f"{m:0>15}")
# get latest model checkpoint file
checkpoint_file = model_checkpoints[-1]
else:
checkpoint_file = checkpoint
return os.path.join(run_path, checkpoint_file)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Utilities for parsing and loading environments configurations.""" """Utilities for parsing and loading configurations."""
from __future__ import annotations from __future__ import annotations
...@@ -11,35 +11,56 @@ import gym ...@@ -11,35 +11,56 @@ import gym
import importlib import importlib
import inspect import inspect
import os import os
import re
import yaml import yaml
from typing import Any from typing import Any
from omni.isaac.orbit.utils import update_class_from_dict, update_dict from omni.isaac.orbit.utils import update_class_from_dict, update_dict
def load_default_env_cfg(task_name: str) -> dict | Any: def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | Any:
"""Load default configuration file for an environment from Gym registry. """Load default configuration given its entry point from the gym registry.
This function resolves the configuration file for environment based on the file type. This function loads the configuration object from the gym registry for the given task name.
It supports both YAML and Python configuration files. It supports both YAML and Python configuration files.
It expects the configuration to be registered in the gym registry as:
.. code-block:: python
gym.register(
id="My-Awesome-Task-v0",
...
kwargs={"env_entry_point_cfg": "path.to.config:ConfigClass"},
)
Usage:
.. code-block:: python
from omni.isaac.orbit_envs.utils.parse_cfg import load_cfg_from_registry
cfg = load_cfg_from_registry("My-Awesome-Task-v0", "env_entry_point_cfg")
Args: Args:
task_name: The name of the environment. task_name: The name of the environment.
entry_point_key: The entry point key to resolve the configuration file.
Returns: Returns:
The parsed configuration object. The parsed configuration object. This is either a dictionary or a class object.
Raises: Raises:
ValueError: If the task name is not provided, i.e. None. ValueError: If the entry point key is not available in the gym registry for the task.
""" """
# check if a task name is provided # obtain the configuration entry point
if task_name is None: cfg_entry_point = gym.spec(task_name)._kwargs.pop(entry_point_key)
raise ValueError("Please provide a valid task name. Hint: Use --task <task_name>.") # check if entry point exists
# retrieve the configuration file to load if cfg_entry_point is None:
cfg_entry_point: str = gym.spec(task_name)._kwargs.pop("cfg_entry_point") raise ValueError(
f"Could not find configuration for the environment: '{task_name}'."
f" Please check that the gym registry has the entry point: '{entry_point_key}'."
)
# parse the default config file # parse the default config file
if cfg_entry_point.endswith(".yaml"): if isinstance(cfg_entry_point, str) and cfg_entry_point.endswith(".yaml"):
if os.path.exists(cfg_entry_point): if os.path.exists(cfg_entry_point):
# absolute path for the config file # absolute path for the config file
config_file = cfg_entry_point config_file = cfg_entry_point
...@@ -50,8 +71,8 @@ def load_default_env_cfg(task_name: str) -> dict | Any: ...@@ -50,8 +71,8 @@ def load_default_env_cfg(task_name: str) -> dict | Any:
# obtain the configuration file path # obtain the configuration file path
config_file = os.path.join(mod_path, file_name) config_file = os.path.join(mod_path, file_name)
# load the configuration # load the configuration
print(f"[INFO]: Parsing default environment configuration from: {config_file}") print(f"[INFO]: Parsing configuration from: {config_file}")
with open(config_file) as f: with open(config_file, encoding="utf-8") as f:
cfg = yaml.full_load(f) cfg = yaml.full_load(f)
else: else:
if callable(cfg_entry_point): if callable(cfg_entry_point):
...@@ -65,22 +86,24 @@ def load_default_env_cfg(task_name: str) -> dict | Any: ...@@ -65,22 +86,24 @@ def load_default_env_cfg(task_name: str) -> dict | Any:
mod = importlib.import_module(mod_name) mod = importlib.import_module(mod_name)
cfg_cls = getattr(mod, attr_name) cfg_cls = getattr(mod, attr_name)
# load the configuration # load the configuration
print(f"[INFO]: Parsing default environment configuration from: {inspect.getfile(cfg_cls)}") print(f"[INFO]: Parsing configuration from: {cfg_entry_point}")
cfg = cfg_cls() if callable(cfg_cls):
cfg = cfg_cls()
else:
cfg = cfg_cls
return cfg return cfg
def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, **kwargs) -> dict | Any: def parse_env_cfg(task_name: str, use_gpu: bool | None = None, num_envs: int | None = None) -> dict | Any:
"""Parse configuration file for an environment and override based on inputs. """Parse configuration for an environment and override based on inputs.
Args: Args:
task_name: The name of the environment. task_name: The name of the environment.
use_gpu: Whether to use GPU/CPU pipeline. Defaults to True. use_gpu: Whether to use GPU/CPU pipeline. Defaults to None, in which case it is left unchanged.
num_envs: Number of environments to create. Defaults to True. num_envs: Number of environments to create. Defaults to None, in which case it is left unchanged.
Returns: Returns:
The parsed configuration object. The parsed configuration object. This is either a dictionary or a class object.
Raises: Raises:
ValueError: If the task name is not provided, i.e. None. ValueError: If the task name is not provided, i.e. None.
...@@ -91,28 +114,22 @@ def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, ** ...@@ -91,28 +114,22 @@ def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, **
# create a dictionary to update from # create a dictionary to update from
args_cfg = {"sim": {"physx": dict()}, "scene": dict()} args_cfg = {"sim": {"physx": dict()}, "scene": dict()}
# resolve pipeline to use (based on input) # resolve pipeline to use (based on input)
if not use_gpu: if use_gpu is not None:
args_cfg["sim"]["use_gpu_pipeline"] = False if not use_gpu:
args_cfg["sim"]["physx"]["use_gpu"] = False args_cfg["sim"]["use_gpu_pipeline"] = False
args_cfg["sim"]["device"] = "cpu" args_cfg["sim"]["physx"]["use_gpu"] = False
else: args_cfg["sim"]["device"] = "cpu"
args_cfg["sim"]["use_gpu_pipeline"] = True else:
args_cfg["sim"]["physx"]["use_gpu"] = True args_cfg["sim"]["use_gpu_pipeline"] = True
args_cfg["sim"]["device"] = "cuda:0" args_cfg["sim"]["physx"]["use_gpu"] = True
# FIXME (15.05.2022): if environment name has soft, then GPU pipeline is not supported yet args_cfg["sim"]["device"] = "cuda:0"
if "soft" in task_name.lower():
args_cfg["sim"]["use_gpu_pipeline"] = False
args_cfg["sim"]["physx"]["use_gpu"] = True
args_cfg["sim"]["device"] = "cpu"
args_cfg["sim"]["use_flatcache"] = False
args_cfg["sim"]["use_fabric"] = False
# number of environments # number of environments
if num_envs is not None: if num_envs is not None:
args_cfg["scene"]["num_envs"] = num_envs args_cfg["scene"]["num_envs"] = num_envs
# load the configuration # load the default configuration
cfg = load_default_env_cfg(task_name) cfg = load_cfg_from_registry(task_name, "env_cfg_entry_point")
# update the main configuration # update the main configuration
if isinstance(cfg, dict): if isinstance(cfg, dict):
cfg = update_dict(cfg, args_cfg) cfg = update_dict(cfg, args_cfg)
...@@ -120,3 +137,59 @@ def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, ** ...@@ -120,3 +137,59 @@ def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, **
update_class_from_dict(cfg, args_cfg) update_class_from_dict(cfg, args_cfg)
return cfg return cfg
def get_checkpoint_path(
log_path: str, run_dir: str = "*", checkpoint: str = "*", sort_alphabetical: bool = True
) -> str:
"""Get path to the model checkpoint in input directory.
The checkpoint file is resolved as: <log_path>/<run_dir>/<checkpoint>.
If run_dir and checkpoint are regex expressions then the most recent (highest alphabetical order) run and checkpoint are selected.
Args:
log_path: The log directory path to find models in.
run_dir: Regex expression for the name of the directory containing the run. Defaults to the most
recent directory created inside :obj:`log_dir`.
checkpoint: The model checkpoint file or directory name. Defaults to the most recent
torch-model saved in the :obj:`run_dir` directory.
sort_alphabetical: Whether to sort the runs and checkpoints by alphabetical order. Defaults to True.
If False, the checkpoints are sorted by the last modified time.
Raises:
ValueError: When no runs are found in the input directory.
ValueError: When no checkpoints are found in the input directory.
Returns:
The path to the model checkpoint.
Reference:
https://github.com/leggedrobotics/legged_gym/blob/master/legged_gym/utils/helpers.py#L103
"""
# check if runs present in directory
try:
# find all runs in the directory that math the regex expression
runs = [
os.path.join(log_path, run) for run in os.scandir(log_path) if run.is_dir() and re.match(run_dir, run.name)
]
# sort matched runs by alphabetical order (latest run should be last)
if sort_alphabetical:
runs.sort()
else:
runs = sorted(runs, key=os.path.getmtime)
# create last run file path
run_path = runs[-1]
except IndexError:
raise ValueError(f"No runs present in the directory: '{log_path}' match: '{run_dir}'.")
# list all model checkpoints in the directory
model_checkpoints = [f for f in os.listdir(run_path) if re.match(checkpoint, f)]
# check if any checkpoints are present
if len(model_checkpoints) == 0:
raise ValueError(f"No checkpoints in the directory: '{run_path}' match '{checkpoint}'.")
# sort alphabetically while ensuring that *_10 comes after *_9
model_checkpoints.sort(key=lambda m: f"{m:0>15}")
# get latest matched checkpoint file
checkpoint_file = model_checkpoints[-1]
return os.path.join(run_path, checkpoint_file)
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from .exporter import export_policy_as_jit, export_policy_as_onnx
from .rl_cfg import *
from .vecenv_wrapper import RslRlVecEnvWrapper
__all__ = [
# wrapper
"RslRlVecEnvWrapper",
# rl-config
"RslRlPpoActorCriticCfg",
"RslRlPpoAlgorithmCfg",
"RslRlOnPolicyRunnerCfg",
# exporters
"export_policy_as_jit",
"export_policy_as_onnx",
]
...@@ -3,125 +3,11 @@ ...@@ -3,125 +3,11 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Wrapper to configure an :class:`RLEnv` instance to RSL-RL vectorized environment.
The following example shows how to wrap an environment for RSL-RL:
.. code-block:: python
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
env = RslRlVecEnvWrapper(env)
"""
from __future__ import annotations
import copy import copy
import gym
import gym.spaces
import os import os
import torch import torch
from typing import Dict, Optional, Tuple
from omni.isaac.orbit.envs import RLEnv
__all__ = ["RslRlVecEnvWrapper", "export_policy_as_jit", "export_policy_as_onnx"]
__all__ = ["export_policy_as_jit", "export_policy_as_onnx"]
"""
Vectorized environment wrapper.
"""
# VecEnvObs is what is returned by the reset() method
# it contains the observation for each env
VecEnvObs = Tuple[torch.Tensor, Optional[torch.Tensor]]
# VecEnvStepReturn is what is returned by the step() method
# it contains the observation (actor and critic), reward, done, info for each env
VecEnvStepReturn = Tuple[VecEnvObs, VecEnvObs, torch.Tensor, torch.Tensor, Dict]
class RslRlVecEnvWrapper(gym.Wrapper):
"""Wraps around Isaac Orbit environment for RSL-RL.
To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_states` (int)
and :attr:`state_space` (:obj:`gym.spaces.Box`). These are used by the learning agent to allocate buffers in
the trajectory memory. Additionally, the method :meth:`_get_observations()` should have the key "critic"
which corresponds to the privileged observations. Since this is optional for some environments, the wrapper
checks if these attributes exist. If they don't then the wrapper defaults to zero as number of privileged
observations.
Reference:
https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
"""
def __init__(self, env: RLEnv):
"""Initializes the wrapper.
Args:
env: The environment to wrap around.
Raises:
ValueError: When the environment is not an instance of :class:`RLEnv`.
ValueError: When the observation space is not a :obj:`gym.spaces.Box`.
"""
# check that input is valid
if not isinstance(env.unwrapped, RLEnv):
raise ValueError(f"The environment must be inherited from RLEnv. Environment type: {type(env)}")
# initialize the wrapper
gym.Wrapper.__init__(self, env)
# store information required by wrapper
orbit_env: RLEnv = self.env.unwrapped
self.num_envs = orbit_env.num_envs
self.num_actions = orbit_env.action_manager.total_action_dim
self.num_obs = orbit_env.observation_manager.group_obs_dim["policy"][0]
# reset at the start since the RSL-RL runner does not call reset
self.env.reset()
"""
Properties
"""
def get_observations(self) -> torch.Tensor:
"""Returns the current observations of the environment."""
obs_dict = self.env.unwrapped.observation_manager.compute()
return obs_dict["policy"], {"observations": obs_dict}
@property
def episode_length_buf(self) -> torch.Tensor:
"""The episode length buffer."""
return self.env.unwrapped.episode_length_buf
@episode_length_buf.setter
def episode_length_buf(self, value: torch.Tensor):
"""Set the episode length buffer.
Note: This is needed to perform random initialization of episode lengths in RSL-RL.
"""
self.env.unwrapped.episode_length_buf = value
"""
Operations - MDP
"""
def reset(self) -> VecEnvObs: # noqa: D102
# reset the environment
obs_dict = self.env.reset()
# return observations
return obs_dict["policy"], {"observations": obs_dict}
def step(self, actions: torch.Tensor) -> VecEnvStepReturn: # noqa: D102
# record step information
obs_dict, rew, dones, extras = self.env.step(actions)
# return step information
obs = obs_dict["policy"]
extras["observations"] = obs_dict
return obs, rew, dones.to(torch.long), extras
"""
Helper functions.
"""
def export_policy_as_jit(actor_critic: object, path: str, filename="policy.pt"): def export_policy_as_jit(actor_critic: object, path: str, filename="policy.pt"):
......
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from dataclasses import MISSING
from omni.isaac.orbit.utils import configclass
__all__ = ["RslRlPpoActorCriticCfg", "RslRlPpoAlgorithmCfg", "RslRlOnPolicyRunnerCfg"]
@configclass
class RslRlPpoActorCriticCfg:
"""Configuration for the PPO actor-critic networks."""
class_name: str = "ActorCritic"
"""The policy class name. Default is ActorCritic."""
init_noise_std: float = MISSING
"""The initial noise standard deviation for the policy."""
actor_hidden_dims: list[int] = MISSING
"""The hidden dimensions of the actor network."""
critic_hidden_dims: list[int] = MISSING
"""The hidden dimensions of the critic network."""
activation: str = MISSING
"""The activation function for the actor and critic networks."""
@configclass
class RslRlPpoAlgorithmCfg:
"""Configuration for the PPO algorithm."""
class_name: str = "PPO"
"""The algorithm class name. Default is PPO."""
value_loss_coef: float = MISSING
"""The coefficient for the value loss."""
use_clipped_value_loss: bool = MISSING
"""Whether to use clipped value loss."""
clip_param: float = MISSING
"""The clipping parameter for the policy."""
entropy_coef: float = MISSING
"""The coefficient for the entropy loss."""
num_learning_epochs: int = MISSING
"""The number of learning epochs per update."""
num_mini_batches: int = MISSING
"""The number of mini-batches per update."""
learning_rate: float = MISSING
"""The learning rate for the policy."""
schedule: str = MISSING
"""The learning rate schedule."""
gamma: float = MISSING
"""The discount factor."""
lam: float = MISSING
"""The lambda parameter for Generalized Advantage Estimation (GAE)."""
desired_kl: float = MISSING
"""The desired KL divergence."""
max_grad_norm: float = MISSING
"""The maximum gradient norm."""
@configclass
class RslRlOnPolicyRunnerCfg:
"""Configuration of the runner for on-policy algorithms."""
seed: int = 42
"""The seed for the experiment. Default is 42."""
device: str = "cuda"
"""The device for the rl-agent. Default is cuda."""
num_steps_per_env: int = MISSING
"""The number of steps per environment per update."""
max_iterations: int = MISSING
"""The maximum number of iterations."""
empirical_normalization: bool = MISSING
"""Whether to use empirical normalization."""
save_interval: int = MISSING
"""The number of iterations between saves."""
experiment_name: str = MISSING
"""The experiment name."""
run_name: str = ""
"""The run name. Default is empty string."""
resume: bool = False
"""Whether to resume. Default is False."""
load_run: str = ".*"
"""The run directory to load. Default is ".*" (all).
If regex expression, the latest (alphabetical order) matching run will be loaded.
"""
load_checkpoint: str = "model_.*.pt"
"""The checkpoint file to load. Default is "model_.*.pt" (all).
If regex expression, the latest (alphabetical order) matching file will be loaded.
"""
policy: RslRlPpoActorCriticCfg = MISSING
"""The policy configuration."""
algorithm: RslRlPpoAlgorithmCfg = MISSING
"""The algorithm configuration."""
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Wrapper to configure an :class:`RLEnv` instance to RSL-RL vectorized environment.
The following example shows how to wrap an environment for RSL-RL:
.. code-block:: python
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
env = RslRlVecEnvWrapper(env)
"""
from __future__ import annotations
import gym
import gym.spaces
import torch
from omni.isaac.orbit.envs import RLEnv
class RslRlVecEnvWrapper(gym.Wrapper):
"""Wraps around Isaac Orbit environment for RSL-RL library
To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_states` (int)
and :attr:`state_space` (:obj:`gym.spaces.Box`). These are used by the learning agent to allocate buffers in
the trajectory memory. Additionally, the method :meth:`_get_observations()` should have the key "critic"
which corresponds to the privileged observations. Since this is optional for some environments, the wrapper
checks if these attributes exist. If they don't then the wrapper defaults to zero as number of privileged
observations.
Reference:
https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
"""
def __init__(self, env: RLEnv):
"""Initializes the wrapper.
Args:
env: The environment to wrap around.
Raises:
ValueError: When the environment is not an instance of :class:`RLEnv`.
"""
# check that input is valid
if not isinstance(env.unwrapped, RLEnv):
raise ValueError(f"The environment must be inherited from RLEnv. Environment type: {type(env)}")
# initialize the wrapper
gym.Wrapper.__init__(self, env)
# store information required by wrapper
orbit_env: RLEnv = self.env.unwrapped
self.num_envs = orbit_env.num_envs
self.num_actions = orbit_env.action_manager.total_action_dim
self.num_obs = orbit_env.observation_manager.group_obs_dim["policy"][0]
# reset at the start since the RSL-RL runner does not call reset
self.env.reset()
"""
Properties
"""
def get_observations(self) -> torch.Tensor:
"""Returns the current observations of the environment."""
obs_dict = self.env.unwrapped.observation_manager.compute()
return obs_dict["policy"], {"observations": obs_dict}
@property
def episode_length_buf(self) -> torch.Tensor:
"""The episode length buffer."""
return self.env.unwrapped.episode_length_buf
@episode_length_buf.setter
def episode_length_buf(self, value: torch.Tensor):
"""Set the episode length buffer.
Note: This is needed to perform random initialization of episode lengths in RSL-RL.
"""
self.env.unwrapped.episode_length_buf = value
"""
Operations - MDP
"""
def reset(self) -> tuple[torch.Tensor, dict]:
# reset the environment
obs_dict = self.env.reset()
# return observations
return obs_dict["policy"], {"observations": obs_dict}
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
# record step information
obs_dict, rew, dones, extras = self.env.step(actions)
# return step information
obs = obs_dict["policy"]
extras["observations"] = obs_dict
return obs, rew, dones, extras
...@@ -15,18 +15,49 @@ The following example shows how to wrap an environment for Stable-Baselines3: ...@@ -15,18 +15,49 @@ The following example shows how to wrap an environment for Stable-Baselines3:
""" """
from __future__ import annotations
import gym import gym
import numpy as np import numpy as np
import torch import torch
from typing import Any, Dict, List from typing import Any
# stable-baselines3
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn
from omni.isaac.orbit.envs import RLEnv from omni.isaac.orbit.envs import RLEnv
__all__ = ["Sb3VecEnvWrapper"] __all__ = ["process_sb3_cfg", "Sb3VecEnvWrapper"]
"""
Configuration Parser.
"""
def process_sb3_cfg(cfg: dict) -> dict:
"""Convert simple YAML types to Stable-Baselines classes/components.
Args:
cfg: A configuration dictionary.
Returns:
A dictionary containing the converted configuration.
Reference:
https://github.com/DLR-RM/rl-baselines3-zoo/blob/0e5eb145faefa33e7d79c7f8c179788574b20da5/utils/exp_manager.py#L358
"""
_direct_eval = ["policy_kwargs", "replay_buffer_class", "replay_buffer_kwargs"]
def update_dict(d):
for key, value in d.items():
if isinstance(value, dict):
update_dict(value)
else:
if key in _direct_eval:
d[key] = eval(value)
return d
# parse agent configuration and convert to classes
return update_dict(cfg)
""" """
...@@ -90,11 +121,11 @@ class Sb3VecEnvWrapper(gym.Wrapper, VecEnv): ...@@ -90,11 +121,11 @@ class Sb3VecEnvWrapper(gym.Wrapper, VecEnv):
Properties Properties
""" """
def get_episode_rewards(self) -> List[float]: def get_episode_rewards(self) -> list[float]:
"""Returns the rewards of all the episodes.""" """Returns the rewards of all the episodes."""
return self._ep_rew_buf.cpu().tolist() return self._ep_rew_buf.cpu().tolist()
def get_episode_lengths(self) -> List[int]: def get_episode_lengths(self) -> list[int]:
"""Returns the number of time-steps of all the episodes.""" """Returns the number of time-steps of all the episodes."""
return self._ep_len_buf.cpu().tolist() return self._ep_len_buf.cpu().tolist()
...@@ -180,10 +211,10 @@ class Sb3VecEnvWrapper(gym.Wrapper, VecEnv): ...@@ -180,10 +211,10 @@ class Sb3VecEnvWrapper(gym.Wrapper, VecEnv):
raise NotImplementedError(f"Unsupported backend for simulation: {self.env.sim.backend}") raise NotImplementedError(f"Unsupported backend for simulation: {self.env.sim.backend}")
return obs return obs
def _process_extras(self, obs, dones, extras, reset_ids) -> List[Dict[str, Any]]: def _process_extras(self, obs, dones, extras, reset_ids) -> list[dict[str, Any]]:
"""Convert miscellaneous information into dictionary for each sub-environment.""" """Convert miscellaneous information into dictionary for each sub-environment."""
# create empty list of dictionaries to fill # create empty list of dictionaries to fill
infos: List[Dict[str, Any]] = [dict.fromkeys(extras.keys()) for _ in range(self.env.num_envs)] infos: list[dict[str, Any]] = [dict.fromkeys(extras.keys()) for _ in range(self.env.num_envs)]
# fill-in information for each sub-environment # fill-in information for each sub-environment
# Note: This loop becomes slow when number of environments is large. # Note: This loop becomes slow when number of environments is large.
for idx in range(self.env.num_envs): for idx in range(self.env.num_envs):
......
...@@ -29,16 +29,63 @@ import copy ...@@ -29,16 +29,63 @@ import copy
import torch import torch
import tqdm import tqdm
# skrl
from skrl.agents.torch import Agent from skrl.agents.torch import Agent
from skrl.envs.torch.wrappers import Wrapper, wrap_env from skrl.envs.torch.wrappers import Wrapper, wrap_env
from skrl.resources.preprocessors.torch import RunningStandardScaler # noqa: F401
from skrl.resources.schedulers.torch import KLAdaptiveRL # noqa: F401
from skrl.trainers.torch import Trainer from skrl.trainers.torch import Trainer
from skrl.trainers.torch.sequential import SEQUENTIAL_TRAINER_DEFAULT_CONFIG from skrl.trainers.torch.sequential import SEQUENTIAL_TRAINER_DEFAULT_CONFIG
from skrl.utils.model_instantiators import Shape # noqa: F401
from omni.isaac.orbit.envs import RLEnv from omni.isaac.orbit.envs import RLEnv
__all__ = ["SkrlVecEnvWrapper", "SkrlSequentialLogTrainer"] __all__ = ["SkrlVecEnvWrapper", "SkrlSequentialLogTrainer"]
"""
Configuration Parser.
"""
def process_skrl_cfg(cfg: dict) -> dict:
"""Convert simple YAML types to skrl classes/components.
Args:
cfg: A configuration dictionary.
Returns:
A dictionary containing the converted configuration.
"""
_direct_eval = [
"learning_rate_scheduler",
"state_preprocessor",
"value_preprocessor",
"input_shape",
"output_shape",
]
def reward_shaper_function(scale):
def reward_shaper(rewards, timestep, timesteps):
return rewards * scale
return reward_shaper
def update_dict(d):
for key, value in d.items():
if isinstance(value, dict):
update_dict(value)
else:
if key in _direct_eval:
d[key] = eval(value)
elif key.endswith("_kwargs"):
d[key] = value if value is not None else {}
elif key in ["rewards_shaper_scale"]:
d["rewards_shaper"] = reward_shaper_function(value)
return d
# parse agent configuration and convert to classes
return update_dict(cfg)
""" """
Vectorized environment wrapper. Vectorized environment wrapper.
......
...@@ -50,7 +50,7 @@ def main(): ...@@ -50,7 +50,7 @@ def main():
for task_spec in gym.envs.registry.all(): for task_spec in gym.envs.registry.all():
if "Isaac" in task_spec.id: if "Isaac" in task_spec.id:
# add details to table # add details to table
table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec._kwargs["cfg_entry_point"]]) table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec._kwargs["env_cfg_entry_point"]])
# increment count # increment count
index += 1 index += 1
......
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Utility functions for parsing rl-games configuration files."""
from __future__ import annotations
import os
import yaml
from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
__all__ = ["RLG_PPO_CONFIG_FILE", "parse_rlg_cfg"]
RLG_PPO_CONFIG_FILE = {
# classic
"Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/cartpole_ppo.yaml"),
"Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/ant_ppo.yaml"),
"Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/humanoid_ppo.yaml"),
# manipulation
"Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/lift_ppo.yaml"),
"Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/reach_ppo.yaml"),
}
"""Mapping from environment names to PPO agent files."""
def parse_rlg_cfg(task_name) -> dict:
"""Parse configuration based on command line arguments.
Args:
task_name: The name of the environment.
Returns:
A dictionary containing the parsed configuration.
"""
# retrieve the default environment config file
try:
config_file = RLG_PPO_CONFIG_FILE[task_name]
except KeyError:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.Loader)
return cfg
...@@ -50,17 +50,15 @@ from rl_games.torch_runner import Runner ...@@ -50,17 +50,15 @@ from rl_games.torch_runner import Runner
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg from omni.isaac.orbit_envs.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper
from config import parse_rlg_cfg
def main(): def main():
"""Play with RL-Games agent.""" """Play with RL-Games agent."""
# parse env configuration # parse env configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
agent_cfg = parse_rlg_cfg(args_cli.task) agent_cfg = load_cfg_from_registry(args_cli.task, "rl_games_cfg_entry_point")
# wrap around environment for rl-games # wrap around environment for rl-games
rl_device = agent_cfg["params"]["config"]["device"] rl_device = agent_cfg["params"]["config"]["device"]
......
...@@ -57,11 +57,9 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml ...@@ -57,11 +57,9 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import parse_env_cfg from omni.isaac.orbit_envs.utils import load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper
from config import parse_rlg_cfg
def main(): def main():
"""Train with RL-Games agent.""" """Train with RL-Games agent."""
...@@ -70,7 +68,7 @@ def main(): ...@@ -70,7 +68,7 @@ def main():
# parse configuration # parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
agent_cfg = parse_rlg_cfg(args_cli.task) agent_cfg = load_cfg_from_registry(args_cli.task, "rl_games_cfg_entry_point")
# override from command line # override from command line
if args_cli_seed is not None: if args_cli_seed is not None:
agent_cfg["params"]["seed"] = args_cli_seed agent_cfg["params"]["seed"] = args_cli_seed
...@@ -80,7 +78,7 @@ def main(): ...@@ -80,7 +78,7 @@ def main():
log_root_path = os.path.abspath(log_root_path) log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}") print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs # specify directory for logging runs
log_dir = agent_cfg["params"]["config"].get("full_experiment_name", datetime.now().strftime("%b%d_%H-%M-%S")) log_dir = agent_cfg["params"]["config"].get("full_experiment_name", datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
# set directory into agent config # set directory into agent config
# logging directory path: <train_dir>/<full_experiment_name> # logging directory path: <train_dir>/<full_experiment_name>
agent_cfg["params"]["config"]["train_dir"] = log_root_path agent_cfg["params"]["config"]["train_dir"] = log_root_path
......
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Utility functions for parsing robomimic configuration files."""
from __future__ import annotations
import os
from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
ROBOMIMIC_CONFIG_FILES_DICT = {
"Isaac-Lift-Franka-v0": {
"bc": os.path.join(ORBIT_ENVS_DATA_DIR, "robomimic/lift_bc.json"),
"bcq": os.path.join(ORBIT_ENVS_DATA_DIR, "robomimic/lift_bcq.json"),
}
}
"""Mapping from environment names to imitation learning config files."""
__all__ = ["ROBOMIMIC_CONFIG_FILES_DICT"]
...@@ -34,11 +34,27 @@ Args: ...@@ -34,11 +34,27 @@ Args:
task: name of the environment. task: name of the environment.
name: if provided, override the experiment name defined in the config name: if provided, override the experiment name defined in the config
dataset: if provided, override the dataset path defined in the config dataset: if provided, override the dataset path defined in the config
This file has been modified from the original version in the following ways:
* Added import of AppLauncher from omni.isaac.orbit.app to resolve the configuration to load for training.
""" """
from __future__ import annotations from __future__ import annotations
"""Launch Isaac Sim Simulator first."""
from omni.isaac.orbit.app import AppLauncher
# launch omniverse app
app_launcher = AppLauncher(headless=True)
simulation_app = app_launcher.app
"""Rest everything follows."""
import argparse import argparse
import gym
import json import json
import numpy as np import numpy as np
import os import os
...@@ -59,8 +75,6 @@ from robomimic.algo import RolloutPolicy, algo_factory ...@@ -59,8 +75,6 @@ from robomimic.algo import RolloutPolicy, algo_factory
from robomimic.config import config_factory from robomimic.config import config_factory
from robomimic.utils.log_utils import DataLogger, PrintLogger from robomimic.utils.log_utils import DataLogger, PrintLogger
from config import ROBOMIMIC_CONFIG_FILES_DICT
def train(config, device): def train(config, device):
"""Train a model using the algorithm.""" """Train a model using the algorithm."""
...@@ -331,8 +345,17 @@ def main(args): ...@@ -331,8 +345,17 @@ def main(args):
"""Train a model on a task using a specified algorithm.""" """Train a model on a task using a specified algorithm."""
# load config # load config
if args.task is not None: if args.task is not None:
# obtain the configuration entry point
cfg_entry_point_key = f"robomimic_{args.algo}_cfg_entry_point"
cfg_entry_point_file = gym.spec(args.task)._kwargs.pop(cfg_entry_point_key)
# check if entry point exists
if cfg_entry_point_file is None:
raise ValueError(
f"Could not find configuration for the environment: '{args.task}'."
f" Please check that the gym registry has the entry point: '{cfg_entry_point_key}'."
)
# load config from json file # load config from json file
with open(ROBOMIMIC_CONFIG_FILES_DICT[args.task][args.algo]) as f: with open(cfg_entry_point_file) as f:
ext_cfg = json.load(f) ext_cfg = json.load(f)
config = config_factory(ext_cfg["algo_name"]) config = config_factory(ext_cfg["algo_name"])
# update config with external json - this will throw errors if # update config with external json - this will throw errors if
...@@ -387,4 +410,12 @@ if __name__ == "__main__": ...@@ -387,4 +410,12 @@ if __name__ == "__main__":
parser.add_argument("--algo", type=str, default=None, help="Name of the algorithm.") parser.add_argument("--algo", type=str, default=None, help="Name of the algorithm.")
args = parser.parse_args() args = parser.parse_args()
main(args)
try:
# run training
main(args)
except Exception:
raise
finally:
# close sim app
simulation_app.close()
from __future__ import annotations
import argparse
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg
def add_rsl_rl_args(parser: argparse.ArgumentParser):
"""Add RSL-RL arguments to the parser.
Args:
parser: The parser to add the arguments to.
"""
# create a new argument group
arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.")
arg_group.add_argument(
"--experiment_name", type=str, default=None, help="Name of the experiment folder where logs will be stored."
)
arg_group.add_argument("--resume", type=bool, default=None, help="Whether to resume from a checkpoint.")
arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.")
arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.")
def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPolicyRunnerCfg:
"""Parse configuration for RSL-RL agent based on inputs.
Args:
task_name: The name of the environment.
args_cli: The command line arguments.
Returns:
The parsed configuration for RSL-RL agent based on inputs.
"""
from omni.isaac.orbit_envs.utils.parse_cfg import load_cfg_from_registry
# load the default configuration
rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point")
# override the default configuration with CLI arguments
if args_cli.seed is not None:
rslrl_cfg.seed = args_cli.seed
if args_cli.resume is not None:
rslrl_cfg.resume = args_cli.resume
if args_cli.load_run is not None:
rslrl_cfg.load_run = args_cli.load_run
if args_cli.checkpoint is not None:
rslrl_cfg.load_checkpoint = args_cli.checkpoint
return rslrl_cfg
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Utility functions for parsing rsl-rl configuration files."""
from __future__ import annotations
import os
import yaml
from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
__all__ = ["RSLRL_PPO_CONFIG_FILE", "parse_rslrl_cfg"]
RSLRL_PPO_CONFIG_FILE = {
# classic
"Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/cartpole_ppo.yaml"),
# manipulation
"Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/lift_ppo.yaml"),
"Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/reach_ppo.yaml"),
# locomotion
"Isaac-Velocity-Rough-Anymal-C-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/anymal_ppo.yaml"),
"Isaac-Velocity-Rough-Anymal-C-Play-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/anymal_ppo.yaml"),
}
"""Mapping from environment names to PPO agent files."""
def parse_rslrl_cfg(task_name) -> dict:
"""Parse configuration for RSL-RL agent based on inputs.
Args:
task_name: The name of the environment.
Returns:
A dictionary containing the parsed configuration.
"""
# retrieve the default environment config file
try:
config_file = RSLRL_PPO_CONFIG_FILE[task_name]
except KeyError:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
return cfg
...@@ -14,15 +14,19 @@ import argparse ...@@ -14,15 +14,19 @@ import argparse
from omni.isaac.orbit.app import AppLauncher from omni.isaac.orbit.app import AppLauncher
# local imports
import source.standalone.workflows.rsl_rl.cli_args as cli_args # isort: skip
# add argparse arguments # add argparse arguments
parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RSL-RL.") parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.") parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.") parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.") parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
# append RSL-RL cli arguments
cli_args.add_rsl_rl_args(parser)
# append AppLauncher cli args # append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser) AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli = parser.parse_args() args_cli = parser.parse_args()
# launch omniverse app # launch omniverse app
...@@ -43,16 +47,18 @@ from rsl_rl.runners import OnPolicyRunner ...@@ -43,16 +47,18 @@ from rsl_rl.runners import OnPolicyRunner
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper, export_policy_as_onnx from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
from config import parse_rslrl_cfg RslRlVecEnvWrapper,
export_policy_as_onnx,
)
def main(): def main():
"""Play with RSL-RL agent.""" """Play with RSL-RL agent."""
# parse configuration # parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
agent_cfg = parse_rslrl_cfg(args_cli.task) agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
# create isaac environment # create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg) env = gym.make(args_cli.task, cfg=env_cfg)
...@@ -60,29 +66,17 @@ def main(): ...@@ -60,29 +66,17 @@ def main():
env = RslRlVecEnvWrapper(env) env = RslRlVecEnvWrapper(env)
# specify directory for logging experiments # specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg["runner"]["experiment_name"]) log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
log_root_path = os.path.abspath(log_root_path) log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}") print(f"[INFO] Loading experiment from directory: {log_root_path}")
# get path to previous checkpoint resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
if args_cli.checkpoint is None:
# specify directory for logging runs
run_dir = "*"
if agent_cfg["runner"]["load_run"] != -1:
run_dir = agent_cfg["runner"]["load_run"]
# specify name of checkpoint
checkpoint_file = None
if agent_cfg["runner"]["checkpoint"] != -1:
checkpoint_file = f"model_{agent_cfg['runner']['checkpoint']}.pt"
# get path to previous checkpoint
resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file)
else:
resume_path = os.path.abspath(args_cli.checkpoint)
# load previously trained model
print(f"[INFO]: Loading model checkpoint from: {resume_path}") print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# create runner from rsl-rl # load previously trained model
ppo_runner = OnPolicyRunner(env, agent_cfg, log_dir=None, device=agent_cfg["device"]) ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
ppo_runner.load(resume_path) ppo_runner.load(resume_path)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# obtain the trained policy for inference # obtain the trained policy for inference
policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
......
...@@ -15,6 +15,10 @@ import os ...@@ -15,6 +15,10 @@ import os
from omni.isaac.orbit.app import AppLauncher from omni.isaac.orbit.app import AppLauncher
# local imports
import source.standalone.workflows.rsl_rl.cli_args as cli_args # isort: skip
# add argparse arguments # add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
...@@ -24,9 +28,10 @@ parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU p ...@@ -24,9 +28,10 @@ parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU p
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.") parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
# append RSL-RL cli arguments
cli_args.add_rsl_rl_args(parser)
# append AppLauncher cli args # append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser) AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli = parser.parse_args() args_cli = parser.parse_args()
# load cheaper kit config in headless # load cheaper kit config in headless
...@@ -51,15 +56,14 @@ from datetime import datetime ...@@ -51,15 +56,14 @@ from datetime import datetime
import carb import carb
from rsl_rl.runners import OnPolicyRunner from rsl_rl.runners import OnPolicyRunner
from omni.isaac.orbit.envs.rl_env_cfg import RLEnvCfg
from omni.isaac.orbit.utils.dict import print_dict from omni.isaac.orbit.utils.dict import print_dict
from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
from config import parse_rslrl_cfg
torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True
...@@ -70,28 +74,19 @@ torch.backends.cudnn.benchmark = False ...@@ -70,28 +74,19 @@ torch.backends.cudnn.benchmark = False
def main(): def main():
"""Train with RSL-RL agent.""" """Train with RSL-RL agent."""
# parse configuration # parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg: RLEnvCfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
agent_cfg = parse_rslrl_cfg(args_cli.task) agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
# override config from command line
if args_cli.seed is not None:
agent_cfg["seed"] = args_cli.seed
# specify directory for logging experiments # specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg["runner"]["experiment_name"]) log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
log_root_path = os.path.abspath(log_root_path) log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}") print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs # specify directory for logging runs
log_dir = datetime.now().strftime("%b%d_%H-%M-%S") log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if agent_cfg["runner"]["run_name"]: if agent_cfg.run_name:
log_dir += f"_{agent_cfg['runner']['run_name']}" log_dir += f"_{agent_cfg.run_name}"
log_dir = os.path.join(log_root_path, log_dir) log_dir = os.path.join(log_root_path, log_dir)
# dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
# create isaac environment # create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg) env = gym.make(args_cli.task, cfg=env_cfg)
# wrap for video recording # wrap for video recording
...@@ -108,27 +103,26 @@ def main(): ...@@ -108,27 +103,26 @@ def main():
env = RslRlVecEnvWrapper(env) env = RslRlVecEnvWrapper(env)
# create runner from rsl-rl # create runner from rsl-rl
runner = OnPolicyRunner(env, agent_cfg, log_dir=log_dir, device=agent_cfg["device"]) runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
# save resume path before creating a new log_dir # save resume path before creating a new log_dir
if agent_cfg["runner"]["resume"]: if agent_cfg.resume:
# specify directory for logging runs
run_dir = "*"
if agent_cfg["runner"]["load_run"] != -1:
run_dir = agent_cfg["runner"]["load_run"]
# specify name of checkpoint
checkpoint_file = None
if agent_cfg["runner"]["checkpoint"] != -1:
checkpoint_file = f"model_{agent_cfg['runner']['checkpoint']}.pt"
# get path to previous checkpoint # get path to previous checkpoint
resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file) resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
print(f"[INFO]: Loading model checkpoint from: {resume_path}") print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model # load previously trained model
runner.load(resume_path) runner.load(resume_path)
# set seed of the environment # set seed of the environment
env.seed(agent_cfg["seed"]) env.seed(agent_cfg.seed)
# dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
# run training # run training
runner.learn(num_learning_iterations=agent_cfg["runner"]["max_iterations"], init_at_random_ep_len=True) runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
# close the simulator # close the simulator
env.close() env.close()
......
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Utility functions for parsing sb3 configuration files."""
from __future__ import annotations
import os
import yaml
from torch import nn as nn # noqa: F401
from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
__all__ = ["SB3_PPO_CONFIG_FILE", "parse_sb3_cfg"]
SB3_PPO_CONFIG_FILE = {
# classic
"Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/cartpole_ppo.yaml"),
"Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/ant_ppo.yaml"),
"Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/humanoid_ppo.yaml"),
# manipulation
"Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/reach_ppo.yaml"),
}
"""Mapping from environment names to PPO agent files."""
def parse_sb3_cfg(task_name) -> dict:
"""Parse configuration for Stable-baselines3 agent based on inputs.
Args:
task_name: The name of the environment.
Returns:
A dictionary containing the parsed configuration.
"""
# retrieve the default environment config file
try:
config_file = SB3_PPO_CONFIG_FILE[task_name]
except KeyError:
raise ValueError(f"Task not found: {task_name}. Configurations exist for {SB3_PPO_CONFIG_FILE.keys()}")
# parse agent configuration
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
# check config is valid
if cfg is None:
raise ValueError(f"Config file is empty: {config_file}")
# post-process certain arguments
# reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/0e5eb145faefa33e7d79c7f8c179788574b20da5/utils/exp_manager.py#L358
for kwargs_key in ["policy_kwargs", "replay_buffer_class", "replay_buffer_kwargs"]:
if kwargs_key in cfg and isinstance(cfg[kwargs_key], str):
cfg[kwargs_key] = eval(cfg[kwargs_key])
return cfg
...@@ -43,22 +43,22 @@ from stable_baselines3.common.vec_env import VecNormalize ...@@ -43,22 +43,22 @@ from stable_baselines3.common.vec_env import VecNormalize
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils.parse_cfg import parse_env_cfg from omni.isaac.orbit_envs.utils.parse_cfg import load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper, process_sb3_cfg
from config import parse_sb3_cfg
def main(): def main():
"""Play with stable-baselines agent.""" """Play with stable-baselines agent."""
# parse configuration # parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
agent_cfg = load_cfg_from_registry(args_cli.task, "sb3_cfg_entry_point")
# post-process agent configuration
agent_cfg = process_sb3_cfg(agent_cfg)
# create isaac environment # create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg) env = gym.make(args_cli.task, cfg=env_cfg)
# wrap around environment for stable baselines # wrap around environment for stable baselines
env = Sb3VecEnvWrapper(env) env = Sb3VecEnvWrapper(env)
# parse agent configuration
agent_cfg = parse_sb3_cfg(args_cli.task)
# normalize environment (if needed) # normalize environment (if needed)
if "normalize_input" in agent_cfg: if "normalize_input" in agent_cfg:
......
...@@ -59,23 +59,24 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml ...@@ -59,23 +59,24 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import parse_env_cfg from omni.isaac.orbit_envs.utils import load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper, process_sb3_cfg
from config import parse_sb3_cfg
def main(): def main():
"""Train with stable-baselines agent.""" """Train with stable-baselines agent."""
# parse configuration # parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
agent_cfg = parse_sb3_cfg(args_cli.task) agent_cfg = load_cfg_from_registry(args_cli.task, "sb3_cfg_entry_point")
# post-process agent configuration
agent_cfg = process_sb3_cfg(agent_cfg)
# override configuration with command line arguments # override configuration with command line arguments
if args_cli.seed is not None: if args_cli.seed is not None:
agent_cfg["seed"] = args_cli.seed agent_cfg["seed"] = args_cli.seed
# directory for logging into # directory for logging into
log_dir = os.path.join("logs", "sb3", args_cli.task, datetime.now().strftime("%b%d_%H-%M-%S")) log_dir = os.path.join("logs", "sb3", args_cli.task, datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
# dump the configuration into log-directory # dump the configuration into log-directory
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
......
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Utility functions for parsing skrl configuration files."""
from __future__ import annotations
import os
import yaml
from skrl.resources.preprocessors.torch import RunningStandardScaler # noqa: F401
from skrl.resources.schedulers.torch import KLAdaptiveRL # noqa: F401
from skrl.utils.model_instantiators import Shape # noqa: F401
from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
__all__ = ["SKRL_PPO_CONFIG_FILE", "parse_skrl_cfg"]
SKRL_PPO_CONFIG_FILE = {
# classic
"Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/cartpole_ppo.yaml"),
"Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/ant_ppo.yaml"),
"Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/humanoid_ppo.yaml"),
# manipulation
"Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/lift_ppo.yaml"),
"Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/reach_ppo.yaml"),
# locomotion
"Isaac-Velocity-Anymal-C-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/anymal_ppo.yaml"),
}
"""Mapping from environment names to PPO agent files."""
def parse_skrl_cfg(task_name) -> dict:
"""Parse configuration based on command line arguments.
Args:
task_name: The name of the environment.
Returns:
A dictionary containing the parsed configuration.
"""
# retrieve the default environment config file
try:
config_file = SKRL_PPO_CONFIG_FILE[task_name]
except KeyError:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.Loader)
return cfg
def convert_skrl_cfg(cfg):
"""Convert simple YAML types to skrl classes/components.
Args:
cfg: configuration dictionary.
Returns:
A dictionary containing the converted configuration.
"""
_direct_eval = [
"learning_rate_scheduler",
"state_preprocessor",
"value_preprocessor",
"input_shape",
"output_shape",
]
def reward_shaper_function(scale):
def reward_shaper(rewards, timestep, timesteps):
return rewards * scale
return reward_shaper
def update_dict(d):
for key, value in d.items():
if isinstance(value, dict):
update_dict(value)
else:
if key in _direct_eval:
d[key] = eval(value)
elif key.endswith("_kwargs"):
d[key] = value if value is not None else {}
elif key in ["rewards_shaper_scale"]:
d["rewards_shaper"] = reward_shaper_function(value)
return d
# parse agent configuration and convert to classes
return update_dict(cfg)
...@@ -48,17 +48,15 @@ from skrl.utils.model_instantiators import deterministic_model, gaussian_model, ...@@ -48,17 +48,15 @@ from skrl.utils.model_instantiators import deterministic_model, gaussian_model,
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg from omni.isaac.orbit_envs.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlVecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlVecEnvWrapper, process_skrl_cfg
from config import convert_skrl_cfg, parse_skrl_cfg
def main(): def main():
"""Play with skrl agent.""" """Play with skrl agent."""
# parse env configuration # parse env configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
experiment_cfg = parse_skrl_cfg(args_cli.task) experiment_cfg = load_cfg_from_registry(args_cli.task, "skrl_cfg_entry_point")
# create isaac environment # create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg) env = gym.make(args_cli.task, cfg=env_cfg)
...@@ -74,13 +72,13 @@ def main(): ...@@ -74,13 +72,13 @@ def main():
observation_space=env.observation_space, observation_space=env.observation_space,
action_space=env.action_space, action_space=env.action_space,
device=env.device, device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["policy"]), **process_skrl_cfg(experiment_cfg["models"]["policy"]),
) )
models["value"] = deterministic_model( models["value"] = deterministic_model(
observation_space=env.observation_space, observation_space=env.observation_space,
action_space=env.action_space, action_space=env.action_space,
device=env.device, device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["value"]), **process_skrl_cfg(experiment_cfg["models"]["value"]),
) )
# shared models # shared models
else: else:
...@@ -91,8 +89,8 @@ def main(): ...@@ -91,8 +89,8 @@ def main():
structure=None, structure=None,
roles=["policy", "value"], roles=["policy", "value"],
parameters=[ parameters=[
convert_skrl_cfg(experiment_cfg["models"]["policy"]), process_skrl_cfg(experiment_cfg["models"]["policy"]),
convert_skrl_cfg(experiment_cfg["models"]["value"]), process_skrl_cfg(experiment_cfg["models"]["value"]),
], ],
) )
models["value"] = models["policy"] models["value"] = models["policy"]
...@@ -101,7 +99,7 @@ def main(): ...@@ -101,7 +99,7 @@ def main():
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent_cfg = PPO_DEFAULT_CONFIG.copy() agent_cfg = PPO_DEFAULT_CONFIG.copy()
experiment_cfg["agent"]["rewards_shaper"] = None # avoid 'dictionary changed size during iteration' experiment_cfg["agent"]["rewards_shaper"] = None # avoid 'dictionary changed size during iteration'
agent_cfg.update(convert_skrl_cfg(experiment_cfg["agent"])) agent_cfg.update(process_skrl_cfg(experiment_cfg["agent"]))
agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device}) agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device})
agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device}) agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device})
......
...@@ -63,10 +63,8 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml ...@@ -63,10 +63,8 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
import omni.isaac.contrib_envs # noqa: F401 import omni.isaac.contrib_envs # noqa: F401
import omni.isaac.orbit_envs # noqa: F401 import omni.isaac.orbit_envs # noqa: F401
from omni.isaac.orbit_envs.utils import parse_env_cfg from omni.isaac.orbit_envs.utils import load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlSequentialLogTrainer, SkrlVecEnvWrapper from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlSequentialLogTrainer, SkrlVecEnvWrapper, process_skrl_cfg
from config import convert_skrl_cfg, parse_skrl_cfg
def main(): def main():
...@@ -76,14 +74,14 @@ def main(): ...@@ -76,14 +74,14 @@ def main():
# parse configuration # parse configuration
env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs) env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
experiment_cfg = parse_skrl_cfg(args_cli.task) experiment_cfg = load_cfg_from_registry(args_cli.task, "skrl_cfg_entry_point")
# specify directory for logging experiments # specify directory for logging experiments
log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"]) log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
log_root_path = os.path.abspath(log_root_path) log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}") print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs # specify directory for logging runs
log_dir = datetime.now().strftime("%b%d_%H-%M-%S") log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if experiment_cfg["agent"]["experiment"]["experiment_name"]: if experiment_cfg["agent"]["experiment"]["experiment_name"]:
log_dir += f'_{experiment_cfg["agent"]["experiment"]["experiment_name"]}' log_dir += f'_{experiment_cfg["agent"]["experiment"]["experiment_name"]}'
# set directory into agent config # set directory into agent config
...@@ -125,13 +123,13 @@ def main(): ...@@ -125,13 +123,13 @@ def main():
observation_space=env.observation_space, observation_space=env.observation_space,
action_space=env.action_space, action_space=env.action_space,
device=env.device, device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["policy"]), **process_skrl_cfg(experiment_cfg["models"]["policy"]),
) )
models["value"] = deterministic_model( models["value"] = deterministic_model(
observation_space=env.observation_space, observation_space=env.observation_space,
action_space=env.action_space, action_space=env.action_space,
device=env.device, device=env.device,
**convert_skrl_cfg(experiment_cfg["models"]["value"]), **process_skrl_cfg(experiment_cfg["models"]["value"]),
) )
# shared models # shared models
else: else:
...@@ -142,8 +140,8 @@ def main(): ...@@ -142,8 +140,8 @@ def main():
structure=None, structure=None,
roles=["policy", "value"], roles=["policy", "value"],
parameters=[ parameters=[
convert_skrl_cfg(experiment_cfg["models"]["policy"]), process_skrl_cfg(experiment_cfg["models"]["policy"]),
convert_skrl_cfg(experiment_cfg["models"]["value"]), process_skrl_cfg(experiment_cfg["models"]["value"]),
], ],
) )
models["value"] = models["policy"] models["value"] = models["policy"]
...@@ -157,7 +155,7 @@ def main(): ...@@ -157,7 +155,7 @@ def main():
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent_cfg = PPO_DEFAULT_CONFIG.copy() agent_cfg = PPO_DEFAULT_CONFIG.copy()
experiment_cfg["agent"]["rewards_shaper"] = None # avoid 'dictionary changed size during iteration' experiment_cfg["agent"]["rewards_shaper"] = None # avoid 'dictionary changed size during iteration'
agent_cfg.update(convert_skrl_cfg(experiment_cfg["agent"])) agent_cfg.update(process_skrl_cfg(experiment_cfg["agent"]))
agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device}) agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device})
agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device}) agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment