Commit e5493b74 authored by Toni-SM's avatar Toni-SM Committed by David Hoeller

Add the direct workflow shadow hand over multi-agent environment (#105)

# Description

This PR adds the shadow hand over multi-agent direct-workflow task
(`Isaac-Shadow-Hand-Over-Direct-v0`)

## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- New feature (non-breaking change which adds functionality)
- This change requires a documentation update
## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->

---------
Signed-off-by: 's avatarToni-SM <toni.semu@gmail.com>
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
Co-authored-by: 's avatarAlexander <143108850+nv-apoddubny@users.noreply.github.com>
Co-authored-by: 's avatarAlexander Poddubny <apoddubny@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyguo123@hotmail.com>
parent e851048f
...@@ -259,7 +259,25 @@ Classic ...@@ -259,7 +259,25 @@ Classic
.. |cart-double-pendulum-direct-link| replace:: `Isaac-Cart-Double-Pendulum-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py>`__ .. |cart-double-pendulum-direct-link| replace:: `Isaac-Cart-Double-Pendulum-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py>`__
Manipulation
~~~~~~~~~~~~
Environments based on fixed-arm manipulation tasks.
.. table::
:widths: 33 37 30
+----------------------+--------------------------------+--------------------------------------------------------+
| World | Environment ID | Description |
+======================+================================+========================================================+
| |shadow-hand-over| | |shadow-hand-over-direct-link| | Passing an object from one hand over to the other hand |
+----------------------+--------------------------------+--------------------------------------------------------+
.. |shadow-hand-over| image:: ../_static/tasks/manipulation/shadow_hand_over.jpg
.. |shadow-hand-over-direct-link| replace:: `Isaac-Shadow-Hand-Over-Direct-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py>`__
|
Comprehensive List of Environments Comprehensive List of Environments
================================== ==================================
...@@ -298,6 +316,8 @@ Comprehensive List of Environments ...@@ -298,6 +316,8 @@ Comprehensive List of Environments
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+ +------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Quadcopter-Direct-v0 | | Direct | rsl_rl, rl_games, skrl | | Isaac-Quadcopter-Direct-v0 | | Direct | rsl_rl, rl_games, skrl |
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+ +------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Shadow-Hand-Over-Direct-v0 | | Direct | rsl_rl, rl_games, skrl |
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Humanoid-v0 | | Manager Based | rsl_rl, rl_games, skrl, sb3 | | Isaac-Humanoid-v0 | | Manager Based | rsl_rl, rl_games, skrl, sb3 |
+------------------------------------------------+--------------------------------------------+---------------+-----------------------------+ +------------------------------------------------+--------------------------------------------+---------------+-----------------------------+
| Isaac-Ant-v0 | | Manager Based | rsl_rl, rl_games, skrl, sb3 | | Isaac-Ant-v0 | | Manager Based | rsl_rl, rl_games, skrl, sb3 |
......
[package] [package]
# Note: Semantic Versioning is used: https://semver.org/ # Note: Semantic Versioning is used: https://semver.org/
version = "0.10.1" version = "0.10.2"
# Description # Description
title = "Isaac Lab Environments" title = "Isaac Lab Environments"
......
Changelog Changelog
--------- ---------
0.10.2 (2024-08-23)
~~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added ``Isaac-Shadow-Hand-Over-Direct-v0`` multi-agent environment
0.10.1 (2024-08-21) 0.10.1 (2024-08-21)
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
......
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""
ShadowHand Over environment.
"""
import gymnasium as gym
from . import agents
from .shadow_hand_over_env import ShadowHandOverEnv
from .shadow_hand_over_env_cfg import ShadowHandOverEnvCfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Shadow-Hand-Over-Direct-v0",
entry_point="omni.isaac.lab_tasks.direct.shadow_hand_over:ShadowHandOverEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": ShadowHandOverEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandPPORunnerCfg",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"skrl_ippo_cfg_entry_point": f"{agents.__name__}:skrl_ippo_cfg.yaml",
"skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
},
)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
params:
seed: 42
# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
# doesn't have this fine grained control but made it close
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 512, 256, 128]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: shadow_hand_over
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.99
tau : 0.95
learning_rate: 5e-4
lr_schedule: adaptive
schedule_type: standard
kl_threshold: 0.016
score_to_win: 100000
max_epochs: 5000
save_best_after: 100
save_frequency: 200
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 32768
mini_epochs: 5
critic_coef: 4
clip_value: True
seq_length: 4
bounds_loss_coef: 0.0001
player:
deterministic: True
games_num: 100000
print_stats: True
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.lab.utils import configclass
from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class ShadowHandPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 10000
save_interval = 250
experiment_name = "shadow_hand_over"
empirical_normalization = True
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 512, 256, 128],
critic_hidden_dims=[512, 512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.016,
max_grad_norm=1.0,
)
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: "GaussianMixin"
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [512, 256, 128]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see deterministic_model parameters
class: "DeterministicMixin"
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [512, 256, 128]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# Memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: "RandomMemory"
memory_size: -1 # automatically determined (same as agent:rollouts)
# IPPO agent configuration (field names are from IPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/ippo.html
agent:
class: "IPPO"
rollouts: 16
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-4
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
time_limit_bootstrap: True
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "shadow_hand_over"
experiment_name: ""
write_interval: "auto"
checkpoint_interval: "auto"
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: "SequentialTrainer"
timesteps: 36000
environment_info: "log"
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: True
policy: # see gaussian_model parameters
class: "GaussianMixin"
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see deterministic_model parameters
class: "DeterministicMixin"
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# Memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: "RandomMemory"
memory_size: -1 # automatically determined (same as agent:rollouts)
# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
agent:
class: "MAPPO"
rollouts: 16
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-4
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
shared_state_preprocessor: "RunningStandardScaler"
shared_state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
time_limit_bootstrap: True
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "shadow_hand_over"
experiment_name: ""
write_interval: "auto"
checkpoint_interval: "auto"
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: "SequentialTrainer"
timesteps: 36000
environment_info: "log"
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
models:
separate: False
policy: # see gaussian_model parameters
class: "GaussianMixin"
clip_actions: False
clip_log_std: True
initial_log_std: 0
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: ""
output_scale: 1.0
value: # see deterministic_model parameters
class: "DeterministicMixin"
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [512, 512, 256, 128]
hidden_activation: ["elu", "elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# Memory
# https://skrl.readthedocs.io/en/latest/api/memories/random.html
memory:
class: "RandomMemory"
memory_size: -1 # automatically determined (same as agent:rollouts)
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html
agent:
class: "PPO"
rollouts: 16
learning_epochs: 5
mini_batches: 4
discount_factor: 0.99
lambda: 0.95
learning_rate: 5.0e-4
learning_rate_scheduler: "KLAdaptiveLR"
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
time_limit_bootstrap: True
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "shadow_hand_over"
experiment_name: ""
write_interval: "auto"
checkpoint_interval: "auto"
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: "SequentialTrainer"
timesteps: 36000
environment_info: "log"
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import numpy as np
import torch
from collections.abc import Sequence
import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.assets import Articulation, RigidObject
from omni.isaac.lab.envs import DirectMARLEnv
from omni.isaac.lab.markers import VisualizationMarkers
from omni.isaac.lab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
from omni.isaac.lab.utils.math import quat_conjugate, quat_from_angle_axis, quat_mul, sample_uniform, saturate
from .shadow_hand_over_env_cfg import ShadowHandOverEnvCfg
class ShadowHandOverEnv(DirectMARLEnv):
cfg: ShadowHandOverEnvCfg
def __init__(self, cfg: ShadowHandOverEnvCfg, render_mode: str | None = None, **kwargs):
super().__init__(cfg, render_mode, **kwargs)
self.num_hand_dofs = self.right_hand.num_joints
# buffers for position targets
self.right_hand_dof_targets = torch.zeros(
(self.num_envs, self.num_hand_dofs), dtype=torch.float, device=self.device
)
self.right_hand_prev_targets = torch.zeros(
(self.num_envs, self.num_hand_dofs), dtype=torch.float, device=self.device
)
self.right_hand_curr_targets = torch.zeros(
(self.num_envs, self.num_hand_dofs), dtype=torch.float, device=self.device
)
self.left_hand_dof_targets = torch.zeros(
(self.num_envs, self.num_hand_dofs), dtype=torch.float, device=self.device
)
self.left_hand_prev_targets = torch.zeros(
(self.num_envs, self.num_hand_dofs), dtype=torch.float, device=self.device
)
self.left_hand_curr_targets = torch.zeros(
(self.num_envs, self.num_hand_dofs), dtype=torch.float, device=self.device
)
# list of actuated joints
self.actuated_dof_indices = list()
for joint_name in cfg.actuated_joint_names:
self.actuated_dof_indices.append(self.right_hand.joint_names.index(joint_name))
self.actuated_dof_indices.sort()
# finger bodies
self.finger_bodies = list()
for body_name in self.cfg.fingertip_body_names:
self.finger_bodies.append(self.right_hand.body_names.index(body_name))
self.finger_bodies.sort()
self.num_fingertips = len(self.finger_bodies)
# joint limits
joint_pos_limits = self.right_hand.root_physx_view.get_dof_limits().to(self.device)
self.hand_dof_lower_limits = joint_pos_limits[..., 0]
self.hand_dof_upper_limits = joint_pos_limits[..., 1]
# used to compare object position
self.in_hand_pos = self.object.data.default_root_state[:, 0:3].clone()
self.in_hand_pos[:, 2] -= 0.04
# default goal positions
self.goal_rot = torch.zeros((self.num_envs, 4), dtype=torch.float, device=self.device)
self.goal_rot[:, 0] = 1.0
self.goal_pos = torch.zeros((self.num_envs, 3), dtype=torch.float, device=self.device)
self.goal_pos[:, :] = torch.tensor([0.0, -0.64, 0.54], device=self.device)
# initialize goal marker
self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg)
# unit tensors
self.x_unit_tensor = torch.tensor([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
self.y_unit_tensor = torch.tensor([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
self.z_unit_tensor = torch.tensor([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
def _setup_scene(self):
# add hand, in-hand object, and goal object
self.right_hand = Articulation(self.cfg.right_robot_cfg)
self.left_hand = Articulation(self.cfg.left_robot_cfg)
self.object = RigidObject(self.cfg.object_cfg)
# add ground plane
spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
# clone and replicate (no need to filter for this environment)
self.scene.clone_environments(copy_from_source=False)
# add articulation to scene - we must register to scene to randomize with EventManager
self.scene.articulations["right_robot"] = self.right_hand
self.scene.articulations["left_robot"] = self.left_hand
self.scene.rigid_objects["object"] = self.object
# add lights
light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
light_cfg.func("/World/Light", light_cfg)
def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
self.actions = actions
def _apply_action(self) -> None:
# right hand target
self.right_hand_curr_targets[:, self.actuated_dof_indices] = scale(
self.actions["right_hand"],
self.hand_dof_lower_limits[:, self.actuated_dof_indices],
self.hand_dof_upper_limits[:, self.actuated_dof_indices],
)
self.right_hand_curr_targets[:, self.actuated_dof_indices] = (
self.cfg.act_moving_average * self.right_hand_curr_targets[:, self.actuated_dof_indices]
+ (1.0 - self.cfg.act_moving_average) * self.right_hand_prev_targets[:, self.actuated_dof_indices]
)
self.right_hand_curr_targets[:, self.actuated_dof_indices] = saturate(
self.right_hand_curr_targets[:, self.actuated_dof_indices],
self.hand_dof_lower_limits[:, self.actuated_dof_indices],
self.hand_dof_upper_limits[:, self.actuated_dof_indices],
)
# left hand target
self.left_hand_curr_targets[:, self.actuated_dof_indices] = scale(
self.actions["left_hand"],
self.hand_dof_lower_limits[:, self.actuated_dof_indices],
self.hand_dof_upper_limits[:, self.actuated_dof_indices],
)
self.left_hand_curr_targets[:, self.actuated_dof_indices] = (
self.cfg.act_moving_average * self.left_hand_curr_targets[:, self.actuated_dof_indices]
+ (1.0 - self.cfg.act_moving_average) * self.left_hand_prev_targets[:, self.actuated_dof_indices]
)
self.left_hand_curr_targets[:, self.actuated_dof_indices] = saturate(
self.left_hand_curr_targets[:, self.actuated_dof_indices],
self.hand_dof_lower_limits[:, self.actuated_dof_indices],
self.hand_dof_upper_limits[:, self.actuated_dof_indices],
)
# save current targets
self.right_hand_prev_targets[:, self.actuated_dof_indices] = self.right_hand_curr_targets[
:, self.actuated_dof_indices
]
self.left_hand_prev_targets[:, self.actuated_dof_indices] = self.left_hand_curr_targets[
:, self.actuated_dof_indices
]
# set targets
self.right_hand.set_joint_position_target(
self.right_hand_curr_targets[:, self.actuated_dof_indices], joint_ids=self.actuated_dof_indices
)
self.left_hand.set_joint_position_target(
self.left_hand_curr_targets[:, self.actuated_dof_indices], joint_ids=self.actuated_dof_indices
)
def _get_observations(self) -> dict[str, torch.Tensor]:
observations = {
"right_hand": torch.cat(
(
# ---- right hand ----
# DOF positions (24)
unscale(self.right_hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits),
# DOF velocities (24)
self.cfg.vel_obs_scale * self.right_hand_dof_vel,
# fingertip positions (5 * 3)
self.right_fingertip_pos.view(self.num_envs, self.num_fingertips * 3),
# fingertip rotations (5 * 4)
self.right_fingertip_rot.view(self.num_envs, self.num_fingertips * 4),
# fingertip linear and angular velocities (5 * 6)
self.right_fingertip_velocities.view(self.num_envs, self.num_fingertips * 6),
# applied actions (20)
self.actions["right_hand"],
# ---- object ----
# positions (3)
self.object_pos,
# rotations (4)
self.object_rot,
# linear velocities (3)
self.object_linvel,
# angular velocities (3)
self.cfg.vel_obs_scale * self.object_angvel,
# ---- goal ----
# positions (3)
self.goal_pos,
# rotations (4)
self.goal_rot,
# goal-object rotation diff (4)
quat_mul(self.object_rot, quat_conjugate(self.goal_rot)),
),
dim=-1,
),
"left_hand": torch.cat(
(
# ---- left hand ----
# DOF positions (24)
unscale(self.left_hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits),
# DOF velocities (24)
self.cfg.vel_obs_scale * self.left_hand_dof_vel,
# fingertip positions (5 * 3)
self.left_fingertip_pos.view(self.num_envs, self.num_fingertips * 3),
# fingertip rotations (5 * 4)
self.left_fingertip_rot.view(self.num_envs, self.num_fingertips * 4),
# fingertip linear and angular velocities (5 * 6)
self.left_fingertip_velocities.view(self.num_envs, self.num_fingertips * 6),
# applied actions (20)
self.actions["left_hand"],
# ---- object ----
# positions (3)
self.object_pos,
# rotations (4)
self.object_rot,
# linear velocities (3)
self.object_linvel,
# angular velocities (3)
self.cfg.vel_obs_scale * self.object_angvel,
# ---- goal ----
# positions (3)
self.goal_pos,
# rotations (4)
self.goal_rot,
# goal-object rotation diff (4)
quat_mul(self.object_rot, quat_conjugate(self.goal_rot)),
),
dim=-1,
),
}
return observations
def _get_states(self) -> torch.Tensor:
states = torch.cat(
(
# ---- right hand ----
# DOF positions (24)
unscale(self.right_hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits),
# DOF velocities (24)
self.cfg.vel_obs_scale * self.right_hand_dof_vel,
# fingertip positions (5 * 3)
self.right_fingertip_pos.view(self.num_envs, self.num_fingertips * 3),
# fingertip rotations (5 * 4)
self.right_fingertip_rot.view(self.num_envs, self.num_fingertips * 4),
# fingertip linear and angular velocities (5 * 6)
self.right_fingertip_velocities.view(self.num_envs, self.num_fingertips * 6),
# applied actions (20)
self.actions["right_hand"],
# ---- left hand ----
# DOF positions (24)
unscale(self.left_hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits),
# DOF velocities (24)
self.cfg.vel_obs_scale * self.left_hand_dof_vel,
# fingertip positions (5 * 3)
self.left_fingertip_pos.view(self.num_envs, self.num_fingertips * 3),
# fingertip rotations (5 * 4)
self.left_fingertip_rot.view(self.num_envs, self.num_fingertips * 4),
# fingertip linear and angular velocities (5 * 6)
self.left_fingertip_velocities.view(self.num_envs, self.num_fingertips * 6),
# applied actions (20)
self.actions["left_hand"],
# ---- object ----
# positions (3)
self.object_pos,
# rotations (4)
self.object_rot,
# linear velocities (3)
self.object_linvel,
# angular velocities (3)
self.cfg.vel_obs_scale * self.object_angvel,
# ---- goal ----
# positions (3)
self.goal_pos,
# rotations (4)
self.goal_rot,
# goal-object rotation diff (4)
quat_mul(self.object_rot, quat_conjugate(self.goal_rot)),
),
dim=-1,
)
return states
def _get_rewards(self) -> dict[str, torch.Tensor]:
# compute reward
goal_dist = torch.norm(self.object_pos - self.goal_pos, p=2, dim=-1)
rew_dist = 2 * torch.exp(-self.cfg.dist_reward_scale * goal_dist)
# log reward components
if "log" not in self.extras:
self.extras["log"] = dict()
self.extras["log"]["dist_reward"] = rew_dist.mean()
self.extras["log"]["dist_goal"] = goal_dist.mean()
return {"right_hand": rew_dist, "left_hand": rew_dist}
def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
self._compute_intermediate_values()
# reset when object has fallen
out_of_reach = self.object_pos[:, 2] <= self.cfg.fall_dist
# reset when episode ends
time_out = self.episode_length_buf >= self.max_episode_length - 1
terminated = {agent: out_of_reach for agent in self.cfg.possible_agents}
time_outs = {agent: time_out for agent in self.cfg.possible_agents}
return terminated, time_outs
def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None):
if env_ids is None:
env_ids = self.right_hand._ALL_INDICES
# reset articulation and rigid body attributes
super()._reset_idx(env_ids)
# reset goals
self._reset_target_pose(env_ids)
# reset object
object_default_state = self.object.data.default_root_state.clone()[env_ids]
pos_noise = sample_uniform(-1.0, 1.0, (len(env_ids), 3), device=self.device)
object_default_state[:, 0:3] = (
object_default_state[:, 0:3] + self.cfg.reset_position_noise * pos_noise + self.scene.env_origins[env_ids]
)
rot_noise = sample_uniform(-1.0, 1.0, (len(env_ids), 2), device=self.device) # noise for X and Y rotation
object_default_state[:, 3:7] = randomize_rotation(
rot_noise[:, 0], rot_noise[:, 1], self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids]
)
object_default_state[:, 7:] = torch.zeros_like(self.object.data.default_root_state[env_ids, 7:])
self.object.write_root_state_to_sim(object_default_state, env_ids)
# reset right hand
delta_max = self.hand_dof_upper_limits[env_ids] - self.right_hand.data.default_joint_pos[env_ids]
delta_min = self.hand_dof_lower_limits[env_ids] - self.right_hand.data.default_joint_pos[env_ids]
dof_pos_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_hand_dofs), device=self.device)
rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise
dof_pos = self.right_hand.data.default_joint_pos[env_ids] + self.cfg.reset_dof_pos_noise * rand_delta
dof_vel_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_hand_dofs), device=self.device)
dof_vel = self.right_hand.data.default_joint_vel[env_ids] + self.cfg.reset_dof_vel_noise * dof_vel_noise
self.right_hand_prev_targets[env_ids] = dof_pos
self.right_hand_curr_targets[env_ids] = dof_pos
self.right_hand_dof_targets[env_ids] = dof_pos
self.right_hand.set_joint_position_target(dof_pos, env_ids=env_ids)
self.right_hand.write_joint_state_to_sim(dof_pos, dof_vel, env_ids=env_ids)
# reset left hand
delta_max = self.hand_dof_upper_limits[env_ids] - self.left_hand.data.default_joint_pos[env_ids]
delta_min = self.hand_dof_lower_limits[env_ids] - self.left_hand.data.default_joint_pos[env_ids]
dof_pos_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_hand_dofs), device=self.device)
rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise
dof_pos = self.left_hand.data.default_joint_pos[env_ids] + self.cfg.reset_dof_pos_noise * rand_delta
dof_vel_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_hand_dofs), device=self.device)
dof_vel = self.left_hand.data.default_joint_vel[env_ids] + self.cfg.reset_dof_vel_noise * dof_vel_noise
self.left_hand_prev_targets[env_ids] = dof_pos
self.left_hand_curr_targets[env_ids] = dof_pos
self.left_hand_dof_targets[env_ids] = dof_pos
self.left_hand.set_joint_position_target(dof_pos, env_ids=env_ids)
self.left_hand.write_joint_state_to_sim(dof_pos, dof_vel, env_ids=env_ids)
self._compute_intermediate_values()
def _reset_target_pose(self, env_ids):
# reset goal rotation
rand_floats = sample_uniform(-1.0, 1.0, (len(env_ids), 2), device=self.device)
new_rot = randomize_rotation(
rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids]
)
# update goal pose and markers
self.goal_rot[env_ids] = new_rot
goal_pos = self.goal_pos + self.scene.env_origins
self.goal_markers.visualize(goal_pos, self.goal_rot)
def _compute_intermediate_values(self):
# data for right hand
self.right_fingertip_pos = self.right_hand.data.body_pos_w[:, self.finger_bodies]
self.right_fingertip_rot = self.right_hand.data.body_quat_w[:, self.finger_bodies]
self.right_fingertip_pos -= self.scene.env_origins.repeat((1, self.num_fingertips)).reshape(
self.num_envs, self.num_fingertips, 3
)
self.right_fingertip_velocities = self.right_hand.data.body_vel_w[:, self.finger_bodies]
self.right_hand_dof_pos = self.right_hand.data.joint_pos
self.right_hand_dof_vel = self.right_hand.data.joint_vel
# data for left hand
self.left_fingertip_pos = self.left_hand.data.body_pos_w[:, self.finger_bodies]
self.left_fingertip_rot = self.left_hand.data.body_quat_w[:, self.finger_bodies]
self.left_fingertip_pos -= self.scene.env_origins.repeat((1, self.num_fingertips)).reshape(
self.num_envs, self.num_fingertips, 3
)
self.left_fingertip_velocities = self.left_hand.data.body_vel_w[:, self.finger_bodies]
self.left_hand_dof_pos = self.left_hand.data.joint_pos
self.left_hand_dof_vel = self.left_hand.data.joint_vel
# data for object
self.object_pos = self.object.data.root_pos_w - self.scene.env_origins
self.object_rot = self.object.data.root_quat_w
self.object_velocities = self.object.data.root_vel_w
self.object_linvel = self.object.data.root_lin_vel_w
self.object_angvel = self.object.data.root_ang_vel_w
@torch.jit.script
def scale(x, lower, upper):
return 0.5 * (x + 1.0) * (upper - lower) + lower
@torch.jit.script
def unscale(x, lower, upper):
return (2.0 * x - upper - lower) / (upper - lower)
@torch.jit.script
def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
return quat_mul(
quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)
)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.lab_assets.shadow_hand import SHADOW_HAND_CFG
import omni.isaac.lab.envs.mdp as mdp
import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.assets import ArticulationCfg, RigidObjectCfg
from omni.isaac.lab.envs import DirectMARLEnvCfg
from omni.isaac.lab.managers import EventTermCfg as EventTerm
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.markers import VisualizationMarkersCfg
from omni.isaac.lab.scene import InteractiveSceneCfg
from omni.isaac.lab.sim import PhysxCfg, SimulationCfg
from omni.isaac.lab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
from omni.isaac.lab.utils import configclass
@configclass
class EventCfg:
"""Configuration for randomization."""
# -- robot
robot_physics_material = EventTerm(
func=mdp.randomize_rigid_body_material,
mode="reset",
min_step_count_between_reset=720,
params={
"asset_cfg": SceneEntityCfg("right_hand"),
"static_friction_range": (0.7, 1.3),
"dynamic_friction_range": (1.0, 1.0),
"restitution_range": (1.0, 1.0),
"num_buckets": 250,
},
)
robot_joint_stiffness_and_damping = EventTerm(
func=mdp.randomize_actuator_gains,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("right_hand", joint_names=".*"),
"stiffness_distribution_params": (0.75, 1.5),
"damping_distribution_params": (0.3, 3.0),
"operation": "scale",
"distribution": "log_uniform",
},
)
robot_joint_limits = EventTerm(
func=mdp.randomize_joint_parameters,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("right_hand", joint_names=".*"),
"lower_limit_distribution_params": (0.00, 0.01),
"upper_limit_distribution_params": (0.00, 0.01),
"operation": "add",
"distribution": "gaussian",
},
)
robot_tendon_properties = EventTerm(
func=mdp.randomize_fixed_tendon_parameters,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("right_hand", fixed_tendon_names=".*"),
"stiffness_distribution_params": (0.75, 1.5),
"damping_distribution_params": (0.3, 3.0),
"operation": "scale",
"distribution": "log_uniform",
},
)
# -- object
object_physics_material = EventTerm(
func=mdp.randomize_rigid_body_material,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("object"),
"static_friction_range": (0.7, 1.3),
"dynamic_friction_range": (1.0, 1.0),
"restitution_range": (1.0, 1.0),
"num_buckets": 250,
},
)
object_scale_mass = EventTerm(
func=mdp.randomize_rigid_body_mass,
min_step_count_between_reset=720,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("object"),
"mass_distribution_params": (0.5, 1.5),
"operation": "scale",
"distribution": "uniform",
},
)
# -- scene
reset_gravity = EventTerm(
func=mdp.randomize_physics_scene_gravity,
mode="interval",
is_global_time=True,
interval_range_s=(36.0, 36.0), # time_s = num_steps * (decimation * dt)
params={
"gravity_distribution_params": ([0.0, 0.0, 0.0], [0.0, 0.0, 0.4]),
"operation": "add",
"distribution": "gaussian",
},
)
@configclass
class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
# env
decimation = 2
episode_length_s = 7.5
possible_agents = ["right_hand", "left_hand"]
num_actions = {"right_hand": 20, "left_hand": 20}
num_observations = {"right_hand": 157, "left_hand": 157}
num_states = 290
# simulation
sim: SimulationCfg = SimulationCfg(
dt=1 / 120,
render_interval=decimation,
physics_material=RigidBodyMaterialCfg(
static_friction=1.0,
dynamic_friction=1.0,
),
physx=PhysxCfg(
bounce_threshold_velocity=0.2,
),
)
# robot
right_robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/RightRobot").replace(
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 0.5),
rot=(1.0, 0.0, 0.0, 0.0),
joint_pos={".*": 0.0},
)
)
left_robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/LeftRobot").replace(
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, -1.0, 0.5),
rot=(0.0, 0.0, 0.0, 1.0),
joint_pos={".*": 0.0},
)
)
actuated_joint_names = [
"robot0_WRJ1",
"robot0_WRJ0",
"robot0_FFJ3",
"robot0_FFJ2",
"robot0_FFJ1",
"robot0_MFJ3",
"robot0_MFJ2",
"robot0_MFJ1",
"robot0_RFJ3",
"robot0_RFJ2",
"robot0_RFJ1",
"robot0_LFJ4",
"robot0_LFJ3",
"robot0_LFJ2",
"robot0_LFJ1",
"robot0_THJ4",
"robot0_THJ3",
"robot0_THJ2",
"robot0_THJ1",
"robot0_THJ0",
]
fingertip_body_names = [
"robot0_ffdistal",
"robot0_mfdistal",
"robot0_rfdistal",
"robot0_lfdistal",
"robot0_thdistal",
]
# in-hand object
object_cfg: RigidObjectCfg = RigidObjectCfg(
prim_path="/World/envs/env_.*/object",
spawn=sim_utils.SphereCfg(
radius=0.0335,
visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.8, 1.0, 0.0)),
physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7),
rigid_props=sim_utils.RigidBodyPropertiesCfg(
kinematic_enabled=False,
disable_gravity=False,
enable_gyroscopic_forces=True,
solver_position_iteration_count=8,
solver_velocity_iteration_count=0,
sleep_threshold=0.005,
stabilization_threshold=0.0025,
max_depenetration_velocity=1000.0,
),
collision_props=sim_utils.CollisionPropertiesCfg(),
mass_props=sim_utils.MassPropertiesCfg(density=500.0),
),
init_state=RigidObjectCfg.InitialStateCfg(pos=(0.0, -0.39, 0.54), rot=(1.0, 0.0, 0.0, 0.0)),
)
# goal object
goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
prim_path="/Visuals/goal_marker",
markers={
"goal": sim_utils.SphereCfg(
radius=0.0335,
visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.4, 0.3, 1.0)),
),
},
)
# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=2048, env_spacing=1.5, replicate_physics=True)
# reset
reset_position_noise = 0.01 # range of position at reset
reset_dof_pos_noise = 0.2 # range of dof pos at reset
reset_dof_vel_noise = 0.0 # range of dof vel at reset
# scales and constants
fall_dist = 0.24
vel_obs_scale = 0.2
act_moving_average = 1.0
# reward-related scales
dist_reward_scale = 20.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment