Commit 6bdf2ba8 authored by Toni-SM's avatar Toni-SM Committed by Mayank Mittal

Updates the workflow integration for skrl 1.1.0 (#249)

Updates the workflow integration for `skrl>=1.1.0`.

- Bug fix (non-breaking change which fixes an issue)

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have run all the tests with `./orbit.sh --test` and they pass
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent 59e6c680
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.5.6"
version = "0.5.7"
# Description
title = "ORBIT Environments"
......
Changelog
---------
0.5.7 (2024-02-28)
~~~~~~~~~~~~~~~~~~
Fixed
^^^^^
* Updated the RL wrapper for the skrl library to the latest release (>= 1.1.0)
0.5.6 (2024-02-21)
~~~~~~~~~~~~~~~~~~
......
......@@ -7,6 +7,7 @@ models:
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: True
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
......@@ -34,7 +35,7 @@ agent:
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
......
......@@ -7,6 +7,7 @@ models:
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: True
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
......@@ -29,14 +30,14 @@ models:
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 1
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate: 1.e-3
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
......@@ -55,11 +56,11 @@ agent:
experiment:
directory: "cartpole"
experiment_name: ""
write_interval: 16
checkpoint_interval: 80
write_interval: 12
checkpoint_interval: 120
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 1600
timesteps: 2400
......@@ -7,6 +7,7 @@ models:
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: True
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
......@@ -34,7 +35,7 @@ agent:
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
......
......@@ -7,6 +7,7 @@ models:
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
......@@ -34,7 +35,7 @@ agent:
discount_factor: 0.99
lambda: 0.95
learning_rate: 1.e-3
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
......
......@@ -22,6 +22,7 @@ gym.register(
kwargs={
"env_cfg_entry_point": joint_pos_env_cfg.FrankaCubeLiftEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
disable_env_checker=True,
)
......@@ -32,6 +33,7 @@ gym.register(
kwargs={
"env_cfg_entry_point": joint_pos_env_cfg.FrankaCubeLiftEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
disable_env_checker=True,
)
......@@ -46,6 +48,7 @@ gym.register(
kwargs={
"env_cfg_entry_point": ik_abs_env_cfg.FrankaCubeLiftEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
disable_env_checker=True,
)
......@@ -56,6 +59,7 @@ gym.register(
kwargs={
"env_cfg_entry_point": ik_abs_env_cfg.FrankaCubeLiftEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
disable_env_checker=True,
)
......@@ -70,6 +74,7 @@ gym.register(
kwargs={
"env_cfg_entry_point": ik_rel_env_cfg.FrankaCubeLiftEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
disable_env_checker=True,
)
......@@ -80,6 +85,7 @@ gym.register(
kwargs={
"env_cfg_entry_point": ik_rel_env_cfg.FrankaCubeLiftEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
disable_env_checker=True,
)
......@@ -7,6 +7,7 @@ models:
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
......@@ -34,7 +35,7 @@ agent:
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: "RunningStandardScaler"
......@@ -53,7 +54,7 @@ agent:
rewards_shaper_scale: 0.01
# logging and checkpoint
experiment:
directory: "lift"
directory: "franka_lift"
experiment_name: ""
write_interval: 120
checkpoint_interval: 1200
......
......@@ -19,6 +19,7 @@ gym.register(
"env_cfg_entry_point": env_cfg.FrankaReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:FrankaReachPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
)
......@@ -30,5 +31,6 @@ gym.register(
"env_cfg_entry_point": env_cfg.FrankaReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:FrankaReachPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
)
# Copyright (c) 2022-2024, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [64, 64]
hidden_activation: ["elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [64, 64]
hidden_activation: ["elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 24
learning_epochs: 8
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 1.e-3
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
rewards_shaper_scale: 0.01
# logging and checkpoint
experiment:
directory: "franka_reach"
experiment_name: ""
write_interval: 120
checkpoint_interval: 1200
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 24000
......@@ -30,12 +30,12 @@ import torch
import tqdm
from skrl.agents.torch import Agent
from skrl.envs.torch.wrappers import Wrapper, wrap_env
from skrl.envs.wrappers.torch import Wrapper, wrap_env
from skrl.resources.preprocessors.torch import RunningStandardScaler # noqa: F401
from skrl.resources.schedulers.torch import KLAdaptiveRL # noqa: F401
from skrl.resources.schedulers.torch import KLAdaptiveLR # noqa: F401
from skrl.trainers.torch import Trainer
from skrl.trainers.torch.sequential import SEQUENTIAL_TRAINER_DEFAULT_CONFIG
from skrl.utils.model_instantiators import Shape # noqa: F401
from skrl.utils.model_instantiators.torch import Shape # noqa: F401
from omni.isaac.orbit.envs import RLTaskEnv
......@@ -160,7 +160,7 @@ class SkrlSequentialLogTrainer(Trainer):
# initialize the base class
super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg)
# init agents
if self.num_agents > 1:
if self.env.num_agents > 1:
for agent in self.agents:
agent.init(trainer_cfg=self.cfg)
else:
......@@ -271,10 +271,10 @@ class SkrlSequentialLogTrainer(Trainer):
timesteps=self.timesteps,
)
# log custom environment data
if "episode" in infos:
for k, v in infos["episode"].items():
if "log" in infos:
for k, v in infos["log"].items():
if isinstance(v, torch.Tensor) and v.numel() == 1:
agent.track_data(f"EpisodeInfo / {k}", v.item())
agent.track_data(k, v.item())
# perform post-interaction
super(type(agent), agent).post_interaction(timestep=timestep, timesteps=self.timesteps)
......
......@@ -34,7 +34,7 @@ INSTALL_REQUIRES = [
# Extra dependencies for RL agents
EXTRAS_REQUIRE = {
"sb3": ["stable-baselines3>=2.0"],
"skrl": ["skrl==0.10.0"],
"skrl": ["skrl>=1.1.0"],
"rl_games": ["rl-games==1.6.1", "gym"], # rl-games still needs gym :(
"rsl_rl": ["rsl_rl@git+https://github.com/leggedrobotics/rsl_rl.git"],
"robomimic": ["robomimic@git+https://github.com/ARISE-Initiative/robomimic.git"],
......
......@@ -34,7 +34,6 @@ args_cli = parser.parse_args()
# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
......@@ -44,9 +43,8 @@ import traceback
import carb
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.utils.model_instantiators import deterministic_model, gaussian_model, shared_model
from skrl.utils.model_instantiators.torch import deterministic_model, gaussian_model, shared_model
import omni.isaac.contrib_tasks # noqa: F401
import omni.isaac.orbit_tasks # noqa: F401
from omni.isaac.orbit_tasks.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_tasks.utils.wrappers.skrl import SkrlVecEnvWrapper, process_skrl_cfg
......
......@@ -40,11 +40,9 @@ if args_cli.headless:
app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.gym.headless.kit"
else:
app_experience = f"{os.environ['EXP_PATH']}/omni.isaac.sim.python.kit"
# launch omniverse app
app_launcher = AppLauncher(args_cli, experience=app_experience)
simulation_app = app_launcher.app
"""Rest everything follows."""
......@@ -56,12 +54,11 @@ import carb
from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG
from skrl.memories.torch import RandomMemory
from skrl.utils import set_seed
from skrl.utils.model_instantiators import deterministic_model, gaussian_model, shared_model
from skrl.utils.model_instantiators.torch import deterministic_model, gaussian_model, shared_model
from omni.isaac.orbit.utils.dict import print_dict
from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
import omni.isaac.contrib_tasks # noqa: F401
import omni.isaac.orbit_tasks # noqa: F401
from omni.isaac.orbit_tasks.utils import load_cfg_from_registry, parse_env_cfg
from omni.isaac.orbit_tasks.utils.wrappers.skrl import SkrlSequentialLogTrainer, SkrlVecEnvWrapper, process_skrl_cfg
......@@ -80,7 +77,7 @@ def main():
log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs
# specify directory for logging runs: {time-stamp}_{run_name}
log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if experiment_cfg["agent"]["experiment"]["experiment_name"]:
log_dir += f'_{experiment_cfg["agent"]["experiment"]["experiment_name"]}'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment