Commit e5493b74 authored by Toni-SM's avatar Toni-SM Committed by David Hoeller

Add the direct workflow shadow hand over multi-agent environment (#105)

# Description

This PR adds the shadow hand over multi-agent direct-workflow task
(`Isaac-Shadow-Hand-Over-Direct-v0`)

## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- New feature (non-breaking change which adds functionality)
- This change requires a documentation update
## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->

---------
Signed-off-by: 's avatarToni-SM <toni.semu@gmail.com>
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
Co-authored-by: 's avatarAlexander <143108850+nv-apoddubny@users.noreply.github.com>
Co-authored-by: 's avatarAlexander Poddubny <apoddubny@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyguo123@hotmail.com>
parent e851048f
......@@ -259,7 +259,25 @@ Classic
.. |cart-double-pendulum-direct-link| replace:: `Isaac-Cart-Double-Pendulum-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py>`__
Manipulation
~~~~~~~~~~~~
Environments based on fixed-arm manipulation tasks.
.. table::
:widths: 33 37 30
+----------------------+--------------------------------+--------------------------------------------------------+
| World | Environment ID | Description |
+======================+================================+========================================================+
| |shadow-hand-over| | |shadow-hand-over-direct-link| | Passing an object from one hand over to the other hand |
+----------------------+--------------------------------+--------------------------------------------------------+
.. |shadow-hand-over| image:: ../_static/tasks/manipulation/shadow_hand_over.jpg
.. |shadow-hand-over-direct-link| replace:: `Isaac-Shadow-Hand-Over-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py>`__
|
Comprehensive List of Environments
==================================
......@@ -298,6 +316,8 @@ Comprehensive List of Environments
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Quadcopter-Direct-v0 | | Direct | rsl_rl, rl_games, skrl |
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Shadow-Hand-Over-Direct-v0 | | Direct | rsl_rl, rl_games, skrl |
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Humanoid-v0 | | Manager Based | rsl_rl, rl_games, skrl, sb3 |
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Ant-v0 | | Manager Based | rsl_rl, rl_games, skrl, sb3 |
......
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.10.1"
version = "0.10.2"
# Description
title = "Isaac Lab Environments"
......
Changelog
---------
0.10.2 (2024-08-23)
~~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added ``Isaac-Shadow-Hand-Over-Direct-v0`` multi-agent environment
0.10.1 (2024-08-21)
~~~~~~~~~~~~~~~~~~~
......
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
ShadowHand Over environment.
"""
import gymnasium as gym
from . import agents
from .shadow_hand_over_env import ShadowHandOverEnv
from .shadow_hand_over_env_cfg import ShadowHandOverEnvCfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Shadow-Hand-Over-Direct-v0",
entry_point="omni.isaac.lab_tasks.direct.shadow_hand_over:ShadowHandOverEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": ShadowHandOverEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
},
)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 512, 256, 128]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: shadow_hand_over
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
schedule_type: standard
kl_threshold: 0.016
score_to_win: 100000
max_epochs: 5000
save_best_after: 100
save_frequency: 200
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 32768
mini_epochs: 5
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001
player:
deterministic: True
games_num: 100000
print_stats: True
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.lab.utils import configclass
from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class ShadowHandPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 10000
save_interval = 250
experiment_name = "shadow_hand_over"
empirical_normalization = True
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 512, 256, 128],
critic_hidden_dims=[512, 512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.016,
max_grad_norm=1.0,
)
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: "GaussianMixin"
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [512, 256, 128]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see deterministic_model parameters
class: "DeterministicMixin"
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [512, 256, 128]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# Memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: "RandomMemory"
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: "IPPO"
rollouts: 16
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-4
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
time_limit_bootstrap: True
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "shadow_hand_over"
experiment_name: ""
write_interval: "auto"
checkpoint_interval: "auto"
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: "SequentialTrainer"
timesteps: 36000
environment_info: "log"
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: "GaussianMixin"
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see deterministic_model parameters
class: "DeterministicMixin"
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# Memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: "RandomMemory"
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: "MAPPO"
rollouts: 16
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-4
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
shared_state_preprocessor: "RunningStandardScaler"
shared_state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
time_limit_bootstrap: True
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "shadow_hand_over"
experiment_name: ""
write_interval: "auto"
checkpoint_interval: "auto"
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: "SequentialTrainer"
timesteps: 36000
environment_info: "log"
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: "GaussianMixin"
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see deterministic_model parameters
class: "DeterministicMixin"
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# Memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: "RandomMemory"
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: "PPO"
rollouts: 16
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-4
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
time_limit_bootstrap: True
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "shadow_hand_over"
experiment_name: ""
write_interval: "auto"
checkpoint_interval: "auto"
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: "SequentialTrainer"
timesteps: 36000
environment_info: "log"
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.lab_assets.shadow_hand import SHADOW_HAND_CFG
import omni.isaac.lab.envs.mdp as mdp
import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.assets import ArticulationCfg, RigidObjectCfg
from omni.isaac.lab.envs import DirectMARLEnvCfg
from omni.isaac.lab.managers import EventTermCfg as EventTerm
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.markers import VisualizationMarkersCfg
from omni.isaac.lab.scene import InteractiveSceneCfg
from omni.isaac.lab.sim import PhysxCfg, SimulationCfg
from omni.isaac.lab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
from omni.isaac.lab.utils import configclass
@configclass
class EventCfg:
"""Configuration for randomization."""
# -- robot
robot_physics_material = EventTerm(
func=mdp.randomize_rigid_body_material,
mode="reset",
min_step_count_between_reset=720,
params={
"asset_cfg": SceneEntityCfg("right_hand"),
"static_friction_range": (0.7, 1.3),
"dynamic_friction_range": (1.0, 1.0),
"restitution_range": (1.0, 1.0),
"num_buckets": 250,
},
)
robot_joint_stiffness_and_damping = EventTerm(
func=mdp.randomize_actuator_gains,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("right_hand", joint_names=".*"),
"stiffness_distribution_params": (0.75, 1.5),
"damping_distribution_params": (0.3, 3.0),
"operation": "scale",
"distribution": "log_uniform",
},
)
robot_joint_limits = EventTerm(
func=mdp.randomize_joint_parameters,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("right_hand", joint_names=".*"),
"lower_limit_distribution_params": (0.00, 0.01),
"upper_limit_distribution_params": (0.00, 0.01),
"operation": "add",
"distribution": "gaussian",
},
)
robot_tendon_properties = EventTerm(
func=mdp.randomize_fixed_tendon_parameters,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("right_hand", fixed_tendon_names=".*"),
"stiffness_distribution_params": (0.75, 1.5),
"damping_distribution_params": (0.3, 3.0),
"operation": "scale",
"distribution": "log_uniform",
},
)
# -- object
object_physics_material = EventTerm(
func=mdp.randomize_rigid_body_material,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("object"),
"static_friction_range": (0.7, 1.3),
"dynamic_friction_range": (1.0, 1.0),
"restitution_range": (1.0, 1.0),
"num_buckets": 250,
},
)
object_scale_mass = EventTerm(
func=mdp.randomize_rigid_body_mass,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("object"),
"mass_distribution_params": (0.5, 1.5),
"operation": "scale",
"distribution": "uniform",
},
)
# -- scene
reset_gravity = EventTerm(
func=mdp.randomize_physics_scene_gravity,
mode="interval",
is_global_time=True,
interval_range_s=(36.0, 36.0), # time_s = num_steps * (decimation * dt)
params={
"gravity_distribution_params": ([0.0, 0.0, 0.0], [0.0, 0.0, 0.4]),
"operation": "add",
"distribution": "gaussian",
},
)
@configclass
class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
# env
decimation = 2
episode_length_s = 7.5
possible_agents = ["right_hand", "left_hand"]
num_actions = {"right_hand": 20, "left_hand": 20}
num_observations = {"right_hand": 157, "left_hand": 157}
num_states = 290
# simulation
sim: SimulationCfg = SimulationCfg(
dt=1 / 120,
render_interval=decimation,
physics_material=RigidBodyMaterialCfg(
static_friction=1.0,
dynamic_friction=1.0,
),
physx=PhysxCfg(
bounce_threshold_velocity=0.2,
),
)
# robot
right_robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/RightRobot").replace(
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 0.5),
rot=(1.0, 0.0, 0.0, 0.0),
joint_pos={".*": 0.0},
)
)
left_robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/LeftRobot").replace(
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, -1.0, 0.5),
rot=(0.0, 0.0, 0.0, 1.0),
joint_pos={".*": 0.0},
)
)
actuated_joint_names = [
"robot0_WRJ1",
"robot0_WRJ0",
"robot0_FFJ3",
"robot0_FFJ2",
"robot0_FFJ1",
"robot0_MFJ3",
"robot0_MFJ2",
"robot0_MFJ1",
"robot0_RFJ3",
"robot0_RFJ2",
"robot0_RFJ1",
"robot0_LFJ4",
"robot0_LFJ3",
"robot0_LFJ2",
"robot0_LFJ1",
"robot0_THJ4",
"robot0_THJ3",
"robot0_THJ2",
"robot0_THJ1",
"robot0_THJ0",
]
fingertip_body_names = [
"robot0_ffdistal",
"robot0_mfdistal",
"robot0_rfdistal",
"robot0_lfdistal",
"robot0_thdistal",
]
# in-hand object
object_cfg: RigidObjectCfg = RigidObjectCfg(
prim_path="/World/envs/env_.*/object",
spawn=sim_utils.SphereCfg(
radius=0.0335,
visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 1.0, 0.0)),
physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7),
rigid_props=sim_utils.RigidBodyPropertiesCfg(
kinematic_enabled=False,
disable_gravity=False,
enable_gyroscopic_forces=True,
solver_position_iteration_count=8,
solver_velocity_iteration_count=0,
sleep_threshold=0.005,
stabilization_threshold=0.0025,
max_depenetration_velocity=1000.0,
),
collision_props=sim_utils.CollisionPropertiesCfg(),
mass_props=sim_utils.MassPropertiesCfg(density=500.0),
),
init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.39, 0.54), rot=(1.0, 0.0, 0.0, 0.0)),
)
# goal object
goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
prim_path="/Visuals/goal_marker",
markers={
"goal": sim_utils.SphereCfg(
radius=0.0335,
visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.4, 0.3, 1.0)),
),
},
)
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=2048, env_spacing=1.5, replicate_physics=True)
# reset
reset_position_noise = 0.01 # range of position at reset
reset_dof_pos_noise = 0.2 # range of dof pos at reset
reset_dof_vel_noise = 0.0 # range of dof vel at reset
# scales and constants
fall_dist = 0.24
vel_obs_scale = 0.2
act_moving_average = 1.0
# reward-related scales
dist_reward_scale = 20.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment