Unverified Commit 0bde6fb2 authored by David Hoeller's avatar David Hoeller Committed by GitHub

Add reach environment for Franka and UR10 (#244)

# Description

This MR fixes the reach environment for end-effector pose tracking to
the new structure. It adds support for two robots: Franka-Panda and
UR10. The training works with RSL-RL and RL-Games.

## Type of change

- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

---------
Co-authored-by: 's avatarMayank Mittal <mittalma@leggedrobotics.com>
parent 39f4e96e
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.9.44"
version = "0.9.45"
# Description
title = "ORBIT framework for Robot Learning"
......
Changelog
---------
0.9.45 (2023-11-24)
~~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added :class:`omni.isaac.orbit.command_generators.UniformPoseCommandGenerator` to generate
poses in the asset's root frame by uniformly sampling from a given range.
0.9.44 (2023-11-16)
~~~~~~~~~~~~~~~~~~~
......
......@@ -23,15 +23,20 @@ from .command_generator_cfg import (
NormalVelocityCommandGeneratorCfg,
NullCommandGeneratorCfg,
TerrainBasedPositionCommandGeneratorCfg,
UniformPoseCommandGeneratorCfg,
UniformVelocityCommandGeneratorCfg,
)
from .null_command_generator import NullCommandGenerator
from .pose_command_generator import UniformPoseCommandGenerator
from .position_command_generator import TerrainBasedPositionCommandGenerator
from .velocity_command_generator import NormalVelocityCommandGenerator, UniformVelocityCommandGenerator
__all__ = [
"CommandGeneratorBase",
"CommandGeneratorBaseCfg",
# pose command generators
"UniformPoseCommandGenerator",
"UniformPoseCommandGeneratorCfg",
# null command generator
"NullCommandGenerator",
"NullCommandGeneratorCfg",
......
......@@ -12,6 +12,7 @@ from omni.isaac.orbit.utils import configclass
from .command_generator_base import CommandGeneratorBase
from .null_command_generator import NullCommandGenerator
from .pose_command_generator import UniformPoseCommandGenerator
from .position_command_generator import TerrainBasedPositionCommandGenerator
from .velocity_command_generator import NormalVelocityCommandGenerator, UniformVelocityCommandGenerator
......@@ -123,6 +124,32 @@ class NormalVelocityCommandGeneratorCfg(UniformVelocityCommandGeneratorCfg):
"""Distribution ranges for the velocity commands."""
@configclass
class UniformPoseCommandGeneratorCfg(CommandGeneratorBaseCfg):
"""Configuration for uniform pose command generator."""
class_type: type = UniformPoseCommandGenerator
asset_name: str = MISSING
"""Name of the asset in the environment for which the commands are generated."""
body_name: str = MISSING
"""Name of the body in the asset for which the commands are generated."""
@configclass
class Ranges:
"""Uniform distribution ranges for the pose commands."""
pos_x: tuple[float, float] = MISSING # min max [m]
pos_y: tuple[float, float] = MISSING # min max [m]
pos_z: tuple[float, float] = MISSING # min max [m]
roll: tuple[float, float] = MISSING # min max [rad]
pitch: tuple[float, float] = MISSING # min max [rad]
yaw: tuple[float, float] = MISSING # min max [rad]
ranges: Ranges = MISSING
"""Ranges for the commands."""
@configclass
class TerrainBasedPositionCommandGeneratorCfg(CommandGeneratorBaseCfg):
"""Configuration for the terrain-based position command generator."""
......
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Sub-module containing command generators for pose tracking."""
from __future__ import annotations
import torch
from typing import TYPE_CHECKING, Sequence
from omni.isaac.orbit.assets import Articulation
from omni.isaac.orbit.markers import VisualizationMarkers
from omni.isaac.orbit.markers.config import FRAME_MARKER_CFG
from omni.isaac.orbit.utils.math import combine_frame_transforms, compute_pose_error, quat_from_euler_xyz
from .command_generator_base import CommandGeneratorBase
if TYPE_CHECKING:
from omni.isaac.orbit.envs import BaseEnv
from .command_generator_cfg import UniformPoseCommandGeneratorCfg
class UniformPoseCommandGenerator(CommandGeneratorBase):
"""Command generator for generating pose commands uniformly.
The command generator generates poses by sampling positions uniformly within specified
regions in cartesian space. For orientation, it samples uniformly the euler angles
(roll-pitch-yaw) and converts them into quaternion representation (w, x, y, z).
The position and orientation commands are generated in the base frame of the robot, and not the
simulation world frame. This means that users need to handle the transformation from the
base frame to the simulation world frame themselves.
.. caution::
Sampling orientations uniformly is not strictly the same as sampling euler angles uniformly.
This is because rotations are defined by 3D non-Euclidean space, and the mapping
from euler angles to rotations is not one-to-one.
"""
cfg: UniformPoseCommandGeneratorCfg
"""Configuration for the command generator."""
def __init__(self, cfg: UniformPoseCommandGeneratorCfg, env: BaseEnv):
"""Initialize the command generator class.
Args:
cfg: The configuration parameters for the command generator.
env: The environment object.
"""
# initialize the base class
super().__init__(cfg, env)
# extract the robot and body index for which the command is generated
self.robot: Articulation = env.scene[cfg.asset_name]
self.body_idx = self.robot.find_bodies(cfg.body_name)[0][0]
# create buffers
# -- commands: (x, y, z, qw, qx, qy, qz) in root frame
self.pose_command_b = torch.zeros(self.num_envs, 7, device=self.device)
self.pose_command_b[:, 3] = 1.0
self.pose_command_w = torch.zeros_like(self.pose_command_b)
# -- metrics
self.metrics["position_error"] = torch.zeros(self.num_envs, device=self.device)
self.metrics["orientation_error"] = torch.zeros(self.num_envs, device=self.device)
def __str__(self) -> str:
msg = "PoseCommandGenerator:\n"
msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
msg += f"\tResampling time range: {self.cfg.resampling_time_range}\n"
return msg
"""
Properties
"""
@property
def command(self) -> torch.Tensor:
"""The desired pose command. Shape is (num_envs, 7).
The first three elements correspond to the position, followed by the quaternion orientation in (w, x, y, z).
"""
return self.pose_command_b
"""
Implementation specific functions.
"""
def _resample_command(self, env_ids: Sequence[int]):
# sample new pose targets
# -- position
r = torch.empty(len(env_ids), device=self.device)
self.pose_command_b[env_ids, 0] = r.uniform_(*self.cfg.ranges.pos_x)
self.pose_command_b[env_ids, 1] = r.uniform_(*self.cfg.ranges.pos_y)
self.pose_command_b[env_ids, 2] = r.uniform_(*self.cfg.ranges.pos_z)
# -- orientation
euler_angles = torch.zeros_like(self.pose_command_b[env_ids, :3])
euler_angles[:, 0].uniform_(*self.cfg.ranges.roll)
euler_angles[:, 1].uniform_(*self.cfg.ranges.pitch)
euler_angles[:, 2].uniform_(*self.cfg.ranges.yaw)
self.pose_command_b[env_ids, 3:] = quat_from_euler_xyz(
euler_angles[:, 0], euler_angles[:, 1], euler_angles[:, 2]
)
def _update_command(self):
pass
def _update_metrics(self):
# transform command from base frame to simulation world frame
self.pose_command_w[:, :3], self.pose_command_w[:, 3:] = combine_frame_transforms(
self.robot.data.root_pos_w,
self.robot.data.root_quat_w,
self.pose_command_b[:, :3],
self.pose_command_b[:, 3:],
)
# compute the error
pos_error, rot_error = compute_pose_error(
self.pose_command_w[:, :3],
self.pose_command_w[:, 3:],
self.robot.data.body_state_w[:, self.body_idx, :3],
self.robot.data.body_state_w[:, self.body_idx, 3:7],
)
self.metrics["position_error"] = torch.norm(pos_error, dim=-1)
self.metrics["orientation_error"] = torch.norm(rot_error, dim=-1)
def _set_debug_vis_impl(self, debug_vis: bool):
# create markers if necessary for the first tome
if debug_vis:
if not hasattr(self, "goal_pose_visualizer"):
marker_cfg = FRAME_MARKER_CFG.copy()
marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1)
# -- goal pose
marker_cfg.prim_path = "/Visuals/Command/goal_pose"
self.goal_pose_visualizer = VisualizationMarkers(marker_cfg)
# -- current body pose
marker_cfg.prim_path = "/Visuals/Command/body_pose"
self.body_pose_visualizer = VisualizationMarkers(marker_cfg)
# set their visibility to true
self.goal_pose_visualizer.set_visibility(True)
self.body_pose_visualizer.set_visibility(True)
else:
if hasattr(self, "goal_pose_visualizer"):
self.goal_pose_visualizer.set_visibility(False)
self.body_pose_visualizer.set_visibility(False)
def _debug_vis_callback(self, event):
# update the markers
# -- goal pose
self.goal_pose_visualizer.visualize(self.pose_command_w[:, :3], self.pose_command_w[:, 3:])
# -- current body pose
body_pose_w = self.robot.data.body_state_w[:, self.body_idx]
self.body_pose_visualizer.visualize(body_pose_w[:, :3], body_pose_w[:, 3:7])
......@@ -251,6 +251,9 @@ class ActionManager(ManagerBase):
else:
cfg_items = self.cfg.__dict__.items()
for term_name, term_cfg in cfg_items:
# check if term config is None
if term_cfg is None:
continue
# check valid type
if not isinstance(term_cfg, ActionTermCfg):
raise TypeError(
......
......@@ -42,6 +42,7 @@ __all__ = [
"quat_from_matrix",
"quat_apply_yaw",
"quat_box_minus",
"quat_error_magnitude",
"yaw_quat",
"euler_xyz_from_quat",
"axis_angle_from_quat",
......@@ -496,9 +497,8 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso
eps: The tolerance for Taylor approximation. Defaults to 1.0e-6.
Returns:
Rotations given as a vector in axis angle form, as a tensor
of shape (..., 3), where the magnitude is the angle turned
anti-clockwise in radians around the vector's direction.
Rotations given as a vector in axis angle form, as a tensor of shape (..., 3),
where the magnitude is the angle turned anti-clockwise in radians around the vector's direction.
Reference:
Based on PyTorch3D (https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/transforms/rotation_conversions.py#L526-L554)
......@@ -520,6 +520,21 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso
return quat[..., 1:4] / sin_half_angles_over_angles.unsqueeze(-1)
@torch.jit.script
def quat_error_magnitude(q1: torch.Tensor, q2: torch.Tensor) -> torch.Tensor:
"""Computes the rotation difference between two quaternions.
Args:
q1: A (N, 4) tensor for quaternion (w, x, y, z).
q2: A (N, 4) tensor for quaternion (w, x, y, z).
Returns:
Angular error between input quaternions in radians.
"""
quat_diff = quat_mul(q1, quat_conjugate(q2))
return torch.norm(axis_angle_from_quat(quat_diff), dim=1)
"""
Transformations
"""
......@@ -626,6 +641,9 @@ def compute_pose_error(
Returns:
A tuple containing position and orientation error.
- If :attr:`rot_error_type` is "quat", the orientation error is returned as a quaternion.
- If :attr:`rot_error_type` is "axis_angle", the orientation error is returned as an axis-angle vector.
Raises:
ValueError: Invalid rotation error type.
"""
......
......@@ -49,6 +49,6 @@ __version__ = ORBIT_TASKS_METADATA["package"]["version"]
from .utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "manipulation", "utils", "classic.ant", "classic.humanoid"]
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "utils", "classic.ant", "classic.humanoid"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)
......@@ -4,3 +4,5 @@
# SPDX-License-Identifier: BSD-3-Clause
"""Manipulation environments for fixed-arm robots."""
from .reach import * # noqa
......@@ -3,24 +3,4 @@
#
# SPDX-License-Identifier: BSD-3-Clause
"""Environment for end-effector pose tracking task for fixed-arm robots."""
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-Franka-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.reach_env_cfg:ReachEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
)
"""Fixed-arm environments with end-effector pose tracking commands."""
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "reach"
experiment_name: ""
write_interval: 40
checkpoint_interval: 400
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 16000
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configurations for arm-based reach-tracking environments."""
# We leave this file empty since we don't want to expose any configs in this package directly.
# We still need this file to import the "config" module in the parent package.
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents, joint_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-Franka-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.FrankaReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
},
)
gym.register(
id="Isaac-Reach-Franka-Play-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.FrankaReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
},
)
params:
seed: 42
# environment wrapper clipping
env:
clip_observations: 100.0
clip_actions: 100.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [64, 64]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: reach_franka
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: -1
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 1e-3
lr_schedule: adaptive
schedule_type: legacy
kl_threshold: 0.01
score_to_win: 10000
max_epochs: 1000
save_best_after: 200
save_frequency: 100
print_stats: True
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
e_clip: 0.2
horizon_length: 24
minibatch_size: 24576
mini_epochs: 5
critic_coef: 2
clip_value: True
clip_actions: False
bounds_loss_coef: 0.0001
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1000
save_interval = 50
experiment_name = "reach_franka"
run_name = ""
resume = False
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[64, 64],
critic_hidden_dims=[64, 64],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
from omni.isaac.orbit.utils import configclass
import omni.isaac.orbit_tasks.manipulation.reach.mdp as mdp
from omni.isaac.orbit_tasks.manipulation.reach.reach_env_cfg import ReachEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config import FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
##
# Environment configuration
##
@configclass
class FrankaReachEnvCfg(ReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to franka
self.scene.robot = FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override rewards
self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["panda_hand"]
self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["panda_hand"]
# override actions
self.actions.arm_action = mdp.JointPositionActionCfg(
asset_name="robot", joint_names=["panda_joint.*"], scale=0.5, use_default_offset=True
)
# override command generator body
# end-effector is along z-direction
self.commands.body_name = "panda_hand"
self.commands.ranges.pitch = (math.pi, math.pi)
@configclass
class FrankaReachEnvCfg_PLAY(FrankaReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents, joint_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-UR10-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.UR10ReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
},
)
gym.register(
id="Isaac-Reach-UR10-Play-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.UR10ReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
},
)
......@@ -3,8 +3,8 @@ params:
# environment wrapper clipping
env:
clip_observations: 5.0
clip_actions: 1.0
clip_observations: 100.0
clip_actions: 100.0
algo:
name: a2c_continuous
......@@ -27,7 +27,7 @@ params:
val: 0
fixed_sigma: True
mlp:
units: [256, 128, 64]
units: [64, 64]
activation: elu
d2rl: False
......@@ -40,7 +40,7 @@ params:
load_path: '' # path to the checkpoint to load
config:
name: lift
name: reach_ur10
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
......@@ -56,23 +56,23 @@ params:
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 5e-4
learning_rate: 1e-3
lr_schedule: adaptive
schedule_type: legacy
kl_threshold: 0.008
kl_threshold: 0.01
score_to_win: 10000
max_epochs: 5000
max_epochs: 1000
save_best_after: 200
save_frequency: 100
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
entropy_coef: 0.01
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 2048
horizon_length: 24
minibatch_size: 24576
mini_epochs: 5
critic_coef: 4
critic_coef: 2
clip_value: True
seq_len: 4
clip_actions: False
bounds_loss_coef: 0.0001
......@@ -3,40 +3,41 @@
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
REACH_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=64,
max_iterations=250,
save_interval=50,
experiment_name="reach",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
@configclass
class UR10ReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1000
save_interval = 50
experiment_name = "reach_ur10"
run_name = ""
resume = False
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
actor_hidden_dims=[64, 64],
critic_hidden_dims=[64, 64],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
num_mini_batches=64,
learning_rate=3.0e-4,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
),
)
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
from omni.isaac.orbit.utils import configclass
import omni.isaac.orbit_tasks.manipulation.reach.mdp as mdp
from omni.isaac.orbit_tasks.manipulation.reach.reach_env_cfg import ReachEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config import UR10_CFG
##
# Environment configuration
##
@configclass
class UR10ReachEnvCfg(ReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to ur10
self.scene.robot = UR10_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override randomization
self.randomization.reset_robot_joints.params["position_range"] = (0.75, 1.25)
# override rewards
self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["ee_link"]
self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["ee_link"]
# override actions
self.actions.arm_action = mdp.JointPositionActionCfg(
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
)
# override command generator body
# end-effector is along x-direction
self.commands.body_name = "ee_link"
self.commands.ranges.pitch = (math.pi / 2, math.pi / 2)
@configclass
class UR10ReachEnvCfg_PLAY(UR10ReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the locomotion environments."""
from omni.isaac.orbit.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.orbit.assets import RigidObject
from omni.isaac.orbit.command_generators import UniformPoseCommandGenerator
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.utils.math import combine_frame_transforms, quat_error_magnitude, quat_mul
if TYPE_CHECKING:
from omni.isaac.orbit.envs import RLTaskEnv
def position_command_error(env: RLTaskEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize tracking of the position error using L2-norm.
The function computes the position error between the desired position (from the command) and the
current position of the asset's body (in world frame). The position error is computed as the L2-norm
of the difference between the desired and current positions.
"""
# extract the asset (to enable type hinting)
asset: RigidObject = env.scene[asset_cfg.name]
command_manager: UniformPoseCommandGenerator = env.command_manager
# obtain the desired and current positions
des_pos_b = command_manager.command[:, :3]
des_pos_w, _ = combine_frame_transforms(asset.data.root_state_w[:, :3], asset.data.root_state_w[:, 3:7], des_pos_b)
curr_pos_w = asset.data.body_state_w[:, asset_cfg.body_ids[0], :3] # type: ignore
return torch.norm(curr_pos_w - des_pos_w, dim=1)
def orientation_command_error(env: RLTaskEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize tracking orientation error using shortest path.
The function computes the orientation error between the desired orientation (from the command) and the
current orientation of the asset's body (in world frame). The orientation error is computed as the shortest
path between the desired and current orientations.
"""
# extract the asset (to enable type hinting)
asset: RigidObject = env.scene[asset_cfg.name]
command_manager: UniformPoseCommandGenerator = env.command_manager
# obtain the desired and current orientations
des_quat_b = command_manager.command[:, 3:7]
des_quat_w = quat_mul(asset.data.root_state_w[:, 3:7], des_quat_b)
curr_quat_w = asset.data.body_state_w[:, asset_cfg.body_ids[0], 3:7] # type: ignore
return quat_error_magnitude(curr_quat_w, des_quat_w)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from omni.isaac.orbit.controllers.differential_inverse_kinematics import DifferentialInverseKinematicsCfg
from omni.isaac.orbit.robots.config.franka import FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
from omni.isaac.orbit.robots.single_arm import SingleArmManipulatorCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.orbit_tasks.isaac_env_cfg import EnvCfg, IsaacEnvCfg, SimCfg, ViewerCfg
##
# Scene settings
##
@configclass
class TableCfg:
"""Properties for the table."""
# note: we use instanceable asset since it consumes less memory
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
@configclass
class MarkerCfg:
"""Properties for visualization marker."""
# usd file to import
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/frame_prim.usd"
# scale of the asset at import
scale = [0.1, 0.1, 0.1] # x,y,z
##
# MDP settings
##
@configclass
class RandomizationCfg:
"""Randomization of scene at reset."""
@configclass
class EndEffectorDesiredPoseCfg:
"""Randomization of end-effector pose command."""
# category
position_cat: str = "default" # randomize position: "default", "uniform"
orientation_cat: str = "default" # randomize position: "default", "uniform"
# randomize position
position_default = [0.5, 0.0, 0.5] # position default (x,y,z)
position_uniform_min = [0.25, -0.25, 0.25] # position (x,y,z)
position_uniform_max = [0.5, 0.25, 0.5] # position (x,y,z)
# randomize orientation
orientation_default = [1.0, 0.0, 0.0, 0.0] # orientation default
# initialize
ee_desired_pose: EndEffectorDesiredPoseCfg = EndEffectorDesiredPoseCfg()
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg:
"""Observations for policy group."""
# global group settings
enable_corruption: bool = True
# observation terms
arm_dof_pos_normalized = {"scale": 1.0, "noise": {"name": "uniform", "min": -0.01, "max": 0.01}}
arm_dof_vel = {"scale": 0.5, "noise": {"name": "uniform", "min": -0.1, "max": 0.1}}
ee_position = {}
ee_position_command = {}
actions = {}
# global observation settings
return_dict_obs_in_group = False
"""Whether to return observations as dictionary or flattened vector within groups."""
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
tracking_robot_position_l2 = {"weight": 0.0}
tracking_robot_position_exp = {"weight": 2.5, "sigma": 0.05} # 0.25
penalizing_robot_dof_velocity_l2 = {"weight": -0.02} # -1e-4
penalizing_robot_dof_acceleration_l2 = {"weight": -1e-5}
penalizing_action_rate_l2 = {"weight": -0.1}
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
episode_timeout = True # reset when episode length ended
@configclass
class ControlCfg:
"""Processing of MDP actions."""
# action space
control_type = "default" # "default", "inverse_kinematics"
# decimation: Number of control action updates @ sim dt per policy dt
decimation = 2
# configuration loaded when control_type == "inverse_kinematics"
inverse_kinematics: DifferentialInverseKinematicsCfg = DifferentialInverseKinematicsCfg(
command_type="pose_rel",
ik_method="dls",
position_command_scale=(0.1, 0.1, 0.1),
rotation_command_scale=(0.1, 0.1, 0.1),
)
##
# Environment configuration
##
@configclass
class ReachEnvCfg(IsaacEnvCfg):
"""Configuration for the reach environment."""
# General Settings
env: EnvCfg = EnvCfg(num_envs=2048, env_spacing=2.5, episode_length_s=4.0)
viewer: ViewerCfg = ViewerCfg(debug_vis=True, eye=(7.5, 7.5, 7.5), lookat=(0.0, 0.0, 0.0))
# Physics settings
sim: SimCfg = SimCfg(dt=1.0 / 60.0, substeps=1)
# Scene Settings
robot: SingleArmManipulatorCfg = FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
table: TableCfg = TableCfg()
marker: MarkerCfg = MarkerCfg()
# MDP settings
randomization: RandomizationCfg = RandomizationCfg()
observations: ObservationsCfg = ObservationsCfg()
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Controller settings
control: ControlCfg = ControlCfg()
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from dataclasses import MISSING
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.command_generators import UniformPoseCommandGeneratorCfg
from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.managers import ActionTermCfg as ActionTerm
from omni.isaac.orbit.managers import CurriculumTermCfg as CurrTerm
from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.orbit.utils.noise import AdditiveUniformNoiseCfg as Unoise
import omni.isaac.orbit_tasks.manipulation.reach.mdp as mdp
##
# Scene definition
##
@configclass
class ReachSceneCfg(InteractiveSceneCfg):
"""Configuration for the scene with a robotic arm."""
# world
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(),
init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
)
table = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Table",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd",
),
init_state=AssetBaseCfg.InitialStateCfg(pos=(0.55, 0.0, 0.0), rot=(0.70711, 0.0, 0.0, 0.70711)),
)
# robots
robot: ArticulationCfg = MISSING
# lights
light = AssetBaseCfg(
prim_path="/World/light",
spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
arm_action: ActionTerm = MISSING
gripper_action: ActionTerm | None = None
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
pose_command = ObsTerm(func=mdp.generated_commands)
actions = ObsTerm(func=mdp.last_action)
def __post_init__(self):
self.enable_corruption = True
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RandomizationCfg:
"""Configuration for randomization."""
reset_robot_joints = RandTerm(
func=mdp.reset_joints_by_scale,
mode="reset",
params={
"position_range": (0.5, 1.5),
"velocity_range": (0.0, 0.0),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# task terms
end_effector_position_tracking = RewTerm(
func=mdp.position_command_error,
weight=-0.2,
params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING)},
)
end_effector_orientation_tracking = RewTerm(
func=mdp.orientation_command_error,
weight=-0.05,
params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING)},
)
# action penalty
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.005)
joint_vel = RewTerm(
func=mdp.joint_vel_l2,
weight=-0.0001,
params={"asset_cfg": SceneEntityCfg("robot")},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
@configclass
class CurriculumCfg:
"""Curriculum terms for the MDP."""
action_rate = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.01, "num_steps": 4500}
)
joint_vel = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.0001, "num_steps": 4500}
)
##
# Environment configuration
##
@configclass
class ReachEnvCfg(RLTaskEnvCfg):
"""Configuration for the reach end-effector pose tracking environment."""
# Scene settings
scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
commands: UniformPoseCommandGeneratorCfg = UniformPoseCommandGeneratorCfg(
asset_name="robot",
body_name=MISSING,
resampling_time_range=(4.0, 4.0),
debug_vis=False,
ranges=UniformPoseCommandGeneratorCfg.Ranges(
pos_x=(0.35, 0.65),
pos_y=(-0.2, 0.2),
pos_z=(0.15, 0.5),
roll=(0.0, 0.0),
pitch=MISSING, # depends on end-effector axis
yaw=(-3.14, 3.14),
),
)
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
randomization: RandomizationCfg = RandomizationCfg()
curriculum: CurriculumCfg = CurriculumCfg()
def __post_init__(self):
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 12.0
self.viewer.eye = (3.5, 3.5, 3.5)
# simulation settings
self.sim.dt = 1.0 / 60.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment