Unverified Commit 0bde6fb2 authored by David Hoeller's avatar David Hoeller Committed by GitHub

Add reach environment for Franka and UR10 (#244)

# Description

This MR fixes the reach environment for end-effector pose tracking to
the new structure. It adds support for two robots: Franka-Panda and
UR10. The training works with RSL-RL and RL-Games.

## Type of change

- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

---------
Co-authored-by: 's avatarMayank Mittal <mittalma@leggedrobotics.com>
parent 39f4e96e
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.9.44"
version = "0.9.45"
# Description
title = "ORBIT framework for Robot Learning"
......
Changelog
---------
0.9.45 (2023-11-24)
~~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added :class:`omni.isaac.orbit.command_generators.UniformPoseCommandGenerator` to generate
poses in the asset's root frame by uniformly sampling from a given range.
0.9.44 (2023-11-16)
~~~~~~~~~~~~~~~~~~~
......
......@@ -23,15 +23,20 @@ from .command_generator_cfg import (
NormalVelocityCommandGeneratorCfg,
NullCommandGeneratorCfg,
TerrainBasedPositionCommandGeneratorCfg,
UniformPoseCommandGeneratorCfg,
UniformVelocityCommandGeneratorCfg,
)
from .null_command_generator import NullCommandGenerator
from .pose_command_generator import UniformPoseCommandGenerator
from .position_command_generator import TerrainBasedPositionCommandGenerator
from .velocity_command_generator import NormalVelocityCommandGenerator, UniformVelocityCommandGenerator
__all__ = [
"CommandGeneratorBase",
"CommandGeneratorBaseCfg",
# pose command generators
"UniformPoseCommandGenerator",
"UniformPoseCommandGeneratorCfg",
# null command generator
"NullCommandGenerator",
"NullCommandGeneratorCfg",
......
......@@ -12,6 +12,7 @@ from omni.isaac.orbit.utils import configclass
from .command_generator_base import CommandGeneratorBase
from .null_command_generator import NullCommandGenerator
from .pose_command_generator import UniformPoseCommandGenerator
from .position_command_generator import TerrainBasedPositionCommandGenerator
from .velocity_command_generator import NormalVelocityCommandGenerator, UniformVelocityCommandGenerator
......@@ -123,6 +124,32 @@ class NormalVelocityCommandGeneratorCfg(UniformVelocityCommandGeneratorCfg):
"""Distribution ranges for the velocity commands."""
@configclass
class UniformPoseCommandGeneratorCfg(CommandGeneratorBaseCfg):
"""Configuration for uniform pose command generator."""
class_type: type = UniformPoseCommandGenerator
asset_name: str = MISSING
"""Name of the asset in the environment for which the commands are generated."""
body_name: str = MISSING
"""Name of the body in the asset for which the commands are generated."""
@configclass
class Ranges:
"""Uniform distribution ranges for the pose commands."""
pos_x: tuple[float, float] = MISSING # min max [m]
pos_y: tuple[float, float] = MISSING # min max [m]
pos_z: tuple[float, float] = MISSING # min max [m]
roll: tuple[float, float] = MISSING # min max [rad]
pitch: tuple[float, float] = MISSING # min max [rad]
yaw: tuple[float, float] = MISSING # min max [rad]
ranges: Ranges = MISSING
"""Ranges for the commands."""
@configclass
class TerrainBasedPositionCommandGeneratorCfg(CommandGeneratorBaseCfg):
"""Configuration for the terrain-based position command generator."""
......
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Sub-module containing command generators for pose tracking."""
from __future__ import annotations
import torch
from typing import TYPE_CHECKING, Sequence
from omni.isaac.orbit.assets import Articulation
from omni.isaac.orbit.markers import VisualizationMarkers
from omni.isaac.orbit.markers.config import FRAME_MARKER_CFG
from omni.isaac.orbit.utils.math import combine_frame_transforms, compute_pose_error, quat_from_euler_xyz
from .command_generator_base import CommandGeneratorBase
if TYPE_CHECKING:
from omni.isaac.orbit.envs import BaseEnv
from .command_generator_cfg import UniformPoseCommandGeneratorCfg
class UniformPoseCommandGenerator(CommandGeneratorBase):
"""Command generator for generating pose commands uniformly.
The command generator generates poses by sampling positions uniformly within specified
regions in cartesian space. For orientation, it samples uniformly the euler angles
(roll-pitch-yaw) and converts them into quaternion representation (w, x, y, z).
The position and orientation commands are generated in the base frame of the robot, and not the
simulation world frame. This means that users need to handle the transformation from the
base frame to the simulation world frame themselves.
.. caution::
Sampling orientations uniformly is not strictly the same as sampling euler angles uniformly.
This is because rotations are defined by 3D non-Euclidean space, and the mapping
from euler angles to rotations is not one-to-one.
"""
cfg: UniformPoseCommandGeneratorCfg
"""Configuration for the command generator."""
def __init__(self, cfg: UniformPoseCommandGeneratorCfg, env: BaseEnv):
"""Initialize the command generator class.
Args:
cfg: The configuration parameters for the command generator.
env: The environment object.
"""
# initialize the base class
super().__init__(cfg, env)
# extract the robot and body index for which the command is generated
self.robot: Articulation = env.scene[cfg.asset_name]
self.body_idx = self.robot.find_bodies(cfg.body_name)[0][0]
# create buffers
# -- commands: (x, y, z, qw, qx, qy, qz) in root frame
self.pose_command_b = torch.zeros(self.num_envs, 7, device=self.device)
self.pose_command_b[:, 3] = 1.0
self.pose_command_w = torch.zeros_like(self.pose_command_b)
# -- metrics
self.metrics["position_error"] = torch.zeros(self.num_envs, device=self.device)
self.metrics["orientation_error"] = torch.zeros(self.num_envs, device=self.device)
def __str__(self) -> str:
msg = "PoseCommandGenerator:\n"
msg += f"\tCommand dimension: {tuple(self.command.shape[1:])}\n"
msg += f"\tResampling time range: {self.cfg.resampling_time_range}\n"
return msg
"""
Properties
"""
@property
def command(self) -> torch.Tensor:
"""The desired pose command. Shape is (num_envs, 7).
The first three elements correspond to the position, followed by the quaternion orientation in (w, x, y, z).
"""
return self.pose_command_b
"""
Implementation specific functions.
"""
def _resample_command(self, env_ids: Sequence[int]):
# sample new pose targets
# -- position
r = torch.empty(len(env_ids), device=self.device)
self.pose_command_b[env_ids, 0] = r.uniform_(*self.cfg.ranges.pos_x)
self.pose_command_b[env_ids, 1] = r.uniform_(*self.cfg.ranges.pos_y)
self.pose_command_b[env_ids, 2] = r.uniform_(*self.cfg.ranges.pos_z)
# -- orientation
euler_angles = torch.zeros_like(self.pose_command_b[env_ids, :3])
euler_angles[:, 0].uniform_(*self.cfg.ranges.roll)
euler_angles[:, 1].uniform_(*self.cfg.ranges.pitch)
euler_angles[:, 2].uniform_(*self.cfg.ranges.yaw)
self.pose_command_b[env_ids, 3:] = quat_from_euler_xyz(
euler_angles[:, 0], euler_angles[:, 1], euler_angles[:, 2]
)
def _update_command(self):
pass
def _update_metrics(self):
# transform command from base frame to simulation world frame
self.pose_command_w[:, :3], self.pose_command_w[:, 3:] = combine_frame_transforms(
self.robot.data.root_pos_w,
self.robot.data.root_quat_w,
self.pose_command_b[:, :3],
self.pose_command_b[:, 3:],
)
# compute the error
pos_error, rot_error = compute_pose_error(
self.pose_command_w[:, :3],
self.pose_command_w[:, 3:],
self.robot.data.body_state_w[:, self.body_idx, :3],
self.robot.data.body_state_w[:, self.body_idx, 3:7],
)
self.metrics["position_error"] = torch.norm(pos_error, dim=-1)
self.metrics["orientation_error"] = torch.norm(rot_error, dim=-1)
def _set_debug_vis_impl(self, debug_vis: bool):
# create markers if necessary for the first tome
if debug_vis:
if not hasattr(self, "goal_pose_visualizer"):
marker_cfg = FRAME_MARKER_CFG.copy()
marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1)
# -- goal pose
marker_cfg.prim_path = "/Visuals/Command/goal_pose"
self.goal_pose_visualizer = VisualizationMarkers(marker_cfg)
# -- current body pose
marker_cfg.prim_path = "/Visuals/Command/body_pose"
self.body_pose_visualizer = VisualizationMarkers(marker_cfg)
# set their visibility to true
self.goal_pose_visualizer.set_visibility(True)
self.body_pose_visualizer.set_visibility(True)
else:
if hasattr(self, "goal_pose_visualizer"):
self.goal_pose_visualizer.set_visibility(False)
self.body_pose_visualizer.set_visibility(False)
def _debug_vis_callback(self, event):
# update the markers
# -- goal pose
self.goal_pose_visualizer.visualize(self.pose_command_w[:, :3], self.pose_command_w[:, 3:])
# -- current body pose
body_pose_w = self.robot.data.body_state_w[:, self.body_idx]
self.body_pose_visualizer.visualize(body_pose_w[:, :3], body_pose_w[:, 3:7])
......@@ -251,6 +251,9 @@ class ActionManager(ManagerBase):
else:
cfg_items = self.cfg.__dict__.items()
for term_name, term_cfg in cfg_items:
# check if term config is None
if term_cfg is None:
continue
# check valid type
if not isinstance(term_cfg, ActionTermCfg):
raise TypeError(
......
......@@ -42,6 +42,7 @@ __all__ = [
"quat_from_matrix",
"quat_apply_yaw",
"quat_box_minus",
"quat_error_magnitude",
"yaw_quat",
"euler_xyz_from_quat",
"axis_angle_from_quat",
......@@ -496,9 +497,8 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso
eps: The tolerance for Taylor approximation. Defaults to 1.0e-6.
Returns:
Rotations given as a vector in axis angle form, as a tensor
of shape (..., 3), where the magnitude is the angle turned
anti-clockwise in radians around the vector's direction.
Rotations given as a vector in axis angle form, as a tensor of shape (..., 3),
where the magnitude is the angle turned anti-clockwise in radians around the vector's direction.
Reference:
Based on PyTorch3D (https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/transforms/rotation_conversions.py#L526-L554)
......@@ -520,6 +520,21 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso
return quat[..., 1:4] / sin_half_angles_over_angles.unsqueeze(-1)
@torch.jit.script
def quat_error_magnitude(q1: torch.Tensor, q2: torch.Tensor) -> torch.Tensor:
"""Computes the rotation difference between two quaternions.
Args:
q1: A (N, 4) tensor for quaternion (w, x, y, z).
q2: A (N, 4) tensor for quaternion (w, x, y, z).
Returns:
Angular error between input quaternions in radians.
"""
quat_diff = quat_mul(q1, quat_conjugate(q2))
return torch.norm(axis_angle_from_quat(quat_diff), dim=1)
"""
Transformations
"""
......@@ -626,6 +641,9 @@ def compute_pose_error(
Returns:
A tuple containing position and orientation error.
- If :attr:`rot_error_type` is "quat", the orientation error is returned as a quaternion.
- If :attr:`rot_error_type` is "axis_angle", the orientation error is returned as an axis-angle vector.
Raises:
ValueError: Invalid rotation error type.
"""
......
......@@ -49,6 +49,6 @@ __version__ = ORBIT_TASKS_METADATA["package"]["version"]
from .utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "manipulation", "utils", "classic.ant", "classic.humanoid"]
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "utils", "classic.ant", "classic.humanoid"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)
......@@ -4,3 +4,5 @@
# SPDX-License-Identifier: BSD-3-Clause
"""Manipulation environments for fixed-arm robots."""
from .reach import * # noqa
......@@ -3,24 +3,4 @@
#
# SPDX-License-Identifier: BSD-3-Clause
"""Environment for end-effector pose tracking task for fixed-arm robots."""
import gymnasium as gym
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-Franka-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.reach_env_cfg:ReachEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
},
)
"""Fixed-arm environments with end-effector pose tracking commands."""
seed: 42
# Models are instantiated using skrl's model instantiator utility
# https://skrl.readthedocs.io/en/develop/modules/skrl.utils.model_instantiators.html
models:
separate: False
policy: # see skrl.utils.model_instantiators.gaussian_model for parameter details
clip_actions: False
clip_log_std: True
min_log_std: -20.0
max_log_std: 2.0
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ACTIONS"
output_activation: "tanh"
output_scale: 1.0
value: # see skrl.utils.model_instantiators.deterministic_model for parameter details
clip_actions: False
input_shape: "Shape.STATES"
hiddens: [256, 128, 64]
hidden_activation: ["elu", "elu", "elu"]
output_shape: "Shape.ONE"
output_activation: ""
output_scale: 1.0
# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG)
# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
agent:
rollouts: 16
learning_epochs: 8
mini_batches: 8
discount_factor: 0.99
lambda: 0.95
learning_rate: 3.e-4
learning_rate_scheduler: "KLAdaptiveRL"
learning_rate_scheduler_kwargs:
kl_threshold: 0.01
state_preprocessor: "RunningStandardScaler"
state_preprocessor_kwargs: null
value_preprocessor: "RunningStandardScaler"
value_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 1.0
ratio_clip: 0.2
value_clip: 0.2
clip_predicted_values: True
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0
rewards_shaper_scale: 1.0
# logging and checkpoint
experiment:
directory: "reach"
experiment_name: ""
write_interval: 40
checkpoint_interval: 400
# Sequential trainer
# https://skrl.readthedocs.io/en/latest/modules/skrl.trainers.sequential.html
trainer:
timesteps: 16000
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configurations for arm-based reach-tracking environments."""
# We leave this file empty since we don't want to expose any configs in this package directly.
# We still need this file to import the "config" module in the parent package.
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents, joint_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-Franka-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.FrankaReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
},
)
gym.register(
id="Isaac-Reach-Franka-Play-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.FrankaReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg",
},
)
params:
seed: 42
# environment wrapper clipping
env:
clip_observations: 100.0
clip_actions: 100.0
algo:
name: a2c_continuous
model:
name: continuous_a2c_logstd
network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [64, 64]
activation: elu
d2rl: False
initializer:
name: default
regularizer:
name: None
load_checkpoint: False # flag which sets whether to load the checkpoint
load_path: '' # path to the checkpoint to load
config:
name: reach_franka
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: -1
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 1e-3
lr_schedule: adaptive
schedule_type: legacy
kl_threshold: 0.01
score_to_win: 10000
max_epochs: 1000
save_best_after: 200
save_frequency: 100
print_stats: True
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
e_clip: 0.2
horizon_length: 24
minibatch_size: 24576
mini_epochs: 5
critic_coef: 2
clip_value: True
clip_actions: False
bounds_loss_coef: 0.0001
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1000
save_interval = 50
experiment_name = "reach_franka"
run_name = ""
resume = False
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[64, 64],
critic_hidden_dims=[64, 64],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
from omni.isaac.orbit.utils import configclass
import omni.isaac.orbit_tasks.manipulation.reach.mdp as mdp
from omni.isaac.orbit_tasks.manipulation.reach.reach_env_cfg import ReachEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config import FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
##
# Environment configuration
##
@configclass
class FrankaReachEnvCfg(ReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to franka
self.scene.robot = FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override rewards
self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["panda_hand"]
self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["panda_hand"]
# override actions
self.actions.arm_action = mdp.JointPositionActionCfg(
asset_name="robot", joint_names=["panda_joint.*"], scale=0.5, use_default_offset=True
)
# override command generator body
# end-effector is along z-direction
self.commands.body_name = "panda_hand"
self.commands.ranges.pitch = (math.pi, math.pi)
@configclass
class FrankaReachEnvCfg_PLAY(FrankaReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents, joint_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Reach-UR10-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.UR10ReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
},
)
gym.register(
id="Isaac-Reach-UR10-Play-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": joint_env_cfg.UR10ReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
},
)
......@@ -3,8 +3,8 @@ params:
# environment wrapper clipping
env:
clip_observations: 5.0
clip_actions: 1.0
clip_observations: 100.0
clip_actions: 100.0
algo:
name: a2c_continuous
......@@ -27,7 +27,7 @@ params:
val: 0
fixed_sigma: True
mlp:
units: [256, 128, 64]
units: [64, 64]
activation: elu
d2rl: False
......@@ -40,7 +40,7 @@ params:
load_path: '' # path to the checkpoint to load
config:
name: lift
name: reach_ur10
env_name: rlgpu
device: 'cuda:0'
device_name: 'cuda:0'
......@@ -56,23 +56,23 @@ params:
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 5e-4
learning_rate: 1e-3
lr_schedule: adaptive
schedule_type: legacy
kl_threshold: 0.008
kl_threshold: 0.01
score_to_win: 10000
max_epochs: 5000
max_epochs: 1000
save_best_after: 200
save_frequency: 100
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
entropy_coef: 0.01
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 2048
horizon_length: 24
minibatch_size: 24576
mini_epochs: 5
critic_coef: 4
critic_coef: 2
clip_value: True
seq_len: 4
clip_actions: False
bounds_loss_coef: 0.0001
......@@ -3,40 +3,41 @@
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
REACH_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=64,
max_iterations=250,
save_interval=50,
experiment_name="reach",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
@configclass
class UR10ReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1000
save_interval = 50
experiment_name = "reach_ur10"
run_name = ""
resume = False
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
actor_hidden_dims=[64, 64],
critic_hidden_dims=[64, 64],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
num_mini_batches=64,
learning_rate=3.0e-4,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
),
)
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import math
from omni.isaac.orbit.utils import configclass
import omni.isaac.orbit_tasks.manipulation.reach.mdp as mdp
from omni.isaac.orbit_tasks.manipulation.reach.reach_env_cfg import ReachEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config import UR10_CFG
##
# Environment configuration
##
@configclass
class UR10ReachEnvCfg(ReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# switch robot to ur10
self.scene.robot = UR10_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# override randomization
self.randomization.reset_robot_joints.params["position_range"] = (0.75, 1.25)
# override rewards
self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["ee_link"]
self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["ee_link"]
# override actions
self.actions.arm_action = mdp.JointPositionActionCfg(
asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
)
# override command generator body
# end-effector is along x-direction
self.commands.body_name = "ee_link"
self.commands.ranges.pitch = (math.pi / 2, math.pi / 2)
@configclass
class UR10ReachEnvCfg_PLAY(UR10ReachEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the locomotion environments."""
from omni.isaac.orbit.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.orbit.assets import RigidObject
from omni.isaac.orbit.command_generators import UniformPoseCommandGenerator
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.utils.math import combine_frame_transforms, quat_error_magnitude, quat_mul
if TYPE_CHECKING:
from omni.isaac.orbit.envs import RLTaskEnv
def position_command_error(env: RLTaskEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize tracking of the position error using L2-norm.
The function computes the position error between the desired position (from the command) and the
current position of the asset's body (in world frame). The position error is computed as the L2-norm
of the difference between the desired and current positions.
"""
# extract the asset (to enable type hinting)
asset: RigidObject = env.scene[asset_cfg.name]
command_manager: UniformPoseCommandGenerator = env.command_manager
# obtain the desired and current positions
des_pos_b = command_manager.command[:, :3]
des_pos_w, _ = combine_frame_transforms(asset.data.root_state_w[:, :3], asset.data.root_state_w[:, 3:7], des_pos_b)
curr_pos_w = asset.data.body_state_w[:, asset_cfg.body_ids[0], :3] # type: ignore
return torch.norm(curr_pos_w - des_pos_w, dim=1)
def orientation_command_error(env: RLTaskEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize tracking orientation error using shortest path.
The function computes the orientation error between the desired orientation (from the command) and the
current orientation of the asset's body (in world frame). The orientation error is computed as the shortest
path between the desired and current orientations.
"""
# extract the asset (to enable type hinting)
asset: RigidObject = env.scene[asset_cfg.name]
command_manager: UniformPoseCommandGenerator = env.command_manager
# obtain the desired and current orientations
des_quat_b = command_manager.command[:, 3:7]
des_quat_w = quat_mul(asset.data.root_state_w[:, 3:7], des_quat_b)
curr_quat_w = asset.data.body_state_w[:, asset_cfg.body_ids[0], 3:7] # type: ignore
return quat_error_magnitude(curr_quat_w, des_quat_w)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from omni.isaac.orbit.controllers.differential_inverse_kinematics import DifferentialInverseKinematicsCfg
from omni.isaac.orbit.robots.config.franka import FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
from omni.isaac.orbit.robots.single_arm import SingleArmManipulatorCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.orbit_tasks.isaac_env_cfg import EnvCfg, IsaacEnvCfg, SimCfg, ViewerCfg
##
# Scene settings
##
@configclass
class TableCfg:
"""Properties for the table."""
# note: we use instanceable asset since it consumes less memory
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
@configclass
class MarkerCfg:
"""Properties for visualization marker."""
# usd file to import
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/frame_prim.usd"
# scale of the asset at import
scale = [0.1, 0.1, 0.1] # x,y,z
##
# MDP settings
##
@configclass
class RandomizationCfg:
"""Randomization of scene at reset."""
@configclass
class EndEffectorDesiredPoseCfg:
"""Randomization of end-effector pose command."""
# category
position_cat: str = "default" # randomize position: "default", "uniform"
orientation_cat: str = "default" # randomize position: "default", "uniform"
# randomize position
position_default = [0.5, 0.0, 0.5] # position default (x,y,z)
position_uniform_min = [0.25, -0.25, 0.25] # position (x,y,z)
position_uniform_max = [0.5, 0.25, 0.5] # position (x,y,z)
# randomize orientation
orientation_default = [1.0, 0.0, 0.0, 0.0] # orientation default
# initialize
ee_desired_pose: EndEffectorDesiredPoseCfg = EndEffectorDesiredPoseCfg()
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg:
"""Observations for policy group."""
# global group settings
enable_corruption: bool = True
# observation terms
arm_dof_pos_normalized = {"scale": 1.0, "noise": {"name": "uniform", "min": -0.01, "max": 0.01}}
arm_dof_vel = {"scale": 0.5, "noise": {"name": "uniform", "min": -0.1, "max": 0.1}}
ee_position = {}
ee_position_command = {}
actions = {}
# global observation settings
return_dict_obs_in_group = False
"""Whether to return observations as dictionary or flattened vector within groups."""
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
tracking_robot_position_l2 = {"weight": 0.0}
tracking_robot_position_exp = {"weight": 2.5, "sigma": 0.05} # 0.25
penalizing_robot_dof_velocity_l2 = {"weight": -0.02} # -1e-4
penalizing_robot_dof_acceleration_l2 = {"weight": -1e-5}
penalizing_action_rate_l2 = {"weight": -0.1}
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
episode_timeout = True # reset when episode length ended
@configclass
class ControlCfg:
"""Processing of MDP actions."""
# action space
control_type = "default" # "default", "inverse_kinematics"
# decimation: Number of control action updates @ sim dt per policy dt
decimation = 2
# configuration loaded when control_type == "inverse_kinematics"
inverse_kinematics: DifferentialInverseKinematicsCfg = DifferentialInverseKinematicsCfg(
command_type="pose_rel",
ik_method="dls",
position_command_scale=(0.1, 0.1, 0.1),
rotation_command_scale=(0.1, 0.1, 0.1),
)
##
# Environment configuration
##
@configclass
class ReachEnvCfg(IsaacEnvCfg):
"""Configuration for the reach environment."""
# General Settings
env: EnvCfg = EnvCfg(num_envs=2048, env_spacing=2.5, episode_length_s=4.0)
viewer: ViewerCfg = ViewerCfg(debug_vis=True, eye=(7.5, 7.5, 7.5), lookat=(0.0, 0.0, 0.0))
# Physics settings
sim: SimCfg = SimCfg(dt=1.0 / 60.0, substeps=1)
# Scene Settings
robot: SingleArmManipulatorCfg = FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
table: TableCfg = TableCfg()
marker: MarkerCfg = MarkerCfg()
# MDP settings
randomization: RandomizationCfg = RandomizationCfg()
observations: ObservationsCfg = ObservationsCfg()
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Controller settings
control: ControlCfg = ControlCfg()
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import gymnasium as gym
import math
import torch
import omni.isaac.core.utils.prims as prim_utils
import omni.isaac.orbit.compat.utils.kit as kit_utils
from omni.isaac.orbit.compat.markers import PointMarker, StaticMarker
from omni.isaac.orbit.compat.utils.mdp import ObservationManager, RewardManager
from omni.isaac.orbit.controllers.differential_inverse_kinematics import DifferentialInverseKinematics
from omni.isaac.orbit.robots.single_arm import SingleArmManipulator
from omni.isaac.orbit.utils.dict import class_to_dict
from omni.isaac.orbit.utils.math import random_orientation, sample_uniform, scale_transform
from omni.isaac.orbit_tasks.isaac_env import IsaacEnv, VecEnvIndices, VecEnvObs
from .reach_cfg import RandomizationCfg, ReachEnvCfg
class ReachEnv(IsaacEnv):
"""Environment for reaching to desired pose for a single-arm manipulator."""
def __init__(self, cfg: ReachEnvCfg = None, **kwargs):
# copy configuration
self.cfg = cfg
# parse the configuration for controller configuration
# note: controller decides the robot control mode
self._pre_process_cfg()
# create classes (these are called by the function :meth:`_design_scene`
self.robot = SingleArmManipulator(cfg=self.cfg.robot)
# initialize the base class to setup the scene.
super().__init__(self.cfg, **kwargs)
# parse the configuration for information
self._process_cfg()
# initialize views for the cloned scenes
self._initialize_views()
# prepare the observation manager
self._observation_manager = ReachObservationManager(class_to_dict(self.cfg.observations), self, self.device)
# prepare the reward manager
self._reward_manager = ReachRewardManager(
class_to_dict(self.cfg.rewards), self, self.num_envs, self.dt, self.device
)
# print information about MDP
print("[INFO] Observation Manager:", self._observation_manager)
print("[INFO] Reward Manager: ", self._reward_manager)
# compute the observation space
num_obs = self._observation_manager._group_obs_dim["policy"][0]
self.observation_space = gym.spaces.Box(low=-math.inf, high=math.inf, shape=(num_obs,))
# compute the action space
self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(self.num_actions,))
print("[INFO]: Completed setting up the environment...")
# Take an initial step to initialize the scene.
self.sim.step()
# -- fill up buffers
self.robot.update_buffers(self.dt)
"""
Implementation specifics.
"""
def _design_scene(self):
# ground plane
kit_utils.create_ground_plane("/World/defaultGroundPlane", z_position=-1.05)
# table
prim_utils.create_prim(self.template_env_ns + "/Table", usd_path=self.cfg.table.usd_path)
# robot
self.robot.spawn(self.template_env_ns + "/Robot")
# setup debug visualization
if self.cfg.viewer.debug_vis and self.enable_render:
# create point instancer to visualize the goal points
self._goal_markers = PointMarker("/Visuals/ee_goal", self.num_envs, radius=0.025)
# create marker for viewing end-effector pose
self._ee_markers = StaticMarker(
"/Visuals/ee_current", self.num_envs, usd_path=self.cfg.marker.usd_path, scale=self.cfg.marker.scale
)
# create marker for viewing command (if task-space controller is used)
if self.cfg.control.control_type == "inverse_kinematics":
self._cmd_markers = StaticMarker(
"/Visuals/ik_command", self.num_envs, usd_path=self.cfg.marker.usd_path, scale=self.cfg.marker.scale
)
# return list of global prims
return ["/World/defaultGroundPlane"]
def _reset_idx(self, env_ids: VecEnvIndices):
# randomize the MDP
# -- robot DOF state
dof_pos, dof_vel = self.robot.get_default_dof_state(env_ids=env_ids)
self.robot.set_dof_state(dof_pos, dof_vel, env_ids=env_ids)
# -- desired end-effector pose
self._randomize_ee_desired_pose(env_ids, cfg=self.cfg.randomization.ee_desired_pose)
# -- Reward logging
# fill extras with episode information
self.extras["episode"] = dict()
# reset
# -- rewards manager: fills the sums for terminated episodes
self._reward_manager.reset_idx(env_ids, self.extras["episode"])
# -- obs manager
self._observation_manager.reset_idx(env_ids)
# -- reset history
self.previous_actions[env_ids] = 0
# -- MDP reset
self.reset_buf[env_ids] = 0
self.episode_length_buf[env_ids] = 0
# controller reset
if self.cfg.control.control_type == "inverse_kinematics":
self._ik_controller.reset_idx(env_ids)
def _step_impl(self, actions: torch.Tensor):
# reset environments that terminated
reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
if len(reset_env_ids) > 0:
self._reset_idx(reset_env_ids)
# increment the number of steps
self.episode_length_buf += 1
# pre-step: set actions into buffer
self.actions = actions.clone().to(device=self.device)
# transform actions based on controller
if self.cfg.control.control_type == "inverse_kinematics":
# set the controller commands
self._ik_controller.set_command(self.actions)
# compute the joint commands
self.robot_actions[:, : self.robot.arm_num_dof] = self._ik_controller.compute(
self.robot.data.ee_state_w[:, 0:3] - self.envs_positions,
self.robot.data.ee_state_w[:, 3:7],
self.robot.data.ee_jacobian,
self.robot.data.arm_dof_pos,
)
# offset actuator command with position offsets
self.robot_actions[:, : self.robot.arm_num_dof] -= self.robot.data.actuator_pos_offset[
:, : self.robot.arm_num_dof
]
elif self.cfg.control.control_type == "default":
self.robot_actions[:, : self.robot.arm_num_dof] = self.actions
# perform physics stepping
for _ in range(self.cfg.control.decimation):
# set actions into buffers
self.robot.apply_action(self.robot_actions)
# simulate
self.sim.step(render=self.enable_render)
# check that simulation is playing
if self.sim.is_stopped():
return
# post-step:
# -- compute common buffers
self.robot.update_buffers(self.dt)
# -- compute MDP signals
# reward
self.reward_buf = self._reward_manager.compute()
# terminations
self._check_termination()
# -- store history
self.previous_actions = self.actions.clone()
# -- add information to extra if timeout occurred due to episode length
# Note: this is used by algorithms like PPO where time-outs are handled differently
self.extras["time_outs"] = self.episode_length_buf >= self.max_episode_length
# -- update USD visualization
if self.cfg.viewer.debug_vis and self.enable_render:
self._debug_vis()
def _get_observations(self) -> VecEnvObs:
# compute observations
return self._observation_manager.compute()
"""
Helper functions - Scene handling.
"""
def _pre_process_cfg(self) -> None:
"""Pre processing of configuration parameters."""
# set configuration for task-space controller
if self.cfg.control.control_type == "inverse_kinematics":
print("Using inverse kinematics controller...")
# enable jacobian computation
self.cfg.robot.data_info.enable_jacobian = True
# enable gravity compensation
self.cfg.robot.rigid_props.disable_gravity = True
# set the end-effector offsets
self.cfg.control.inverse_kinematics.position_offset = self.cfg.robot.ee_info.pos_offset
self.cfg.control.inverse_kinematics.rotation_offset = self.cfg.robot.ee_info.rot_offset
else:
print("Using default joint controller...")
def _process_cfg(self) -> None:
"""Post processing of configuration parameters."""
# compute constants for environment
self.dt = self.cfg.control.decimation * self.physics_dt # control-dt
self.max_episode_length = math.ceil(self.cfg.env.episode_length_s / self.dt)
# convert configuration parameters to torch
# randomization
# -- desired pose
config = self.cfg.randomization.ee_desired_pose
for attr in ["position_uniform_min", "position_uniform_max", "position_default", "orientation_default"]:
setattr(config, attr, torch.tensor(getattr(config, attr), device=self.device, requires_grad=False))
def _initialize_views(self) -> None:
"""Creates views and extract useful quantities from them."""
# play the simulator to activate physics handles
# note: this activates the physics simulation view that exposes TensorAPIs
self.sim.reset()
# define views over instances
self.robot.initialize(self.env_ns + "/.*/Robot")
# create controller
if self.cfg.control.control_type == "inverse_kinematics":
self._ik_controller = DifferentialInverseKinematics(
self.cfg.control.inverse_kinematics, self.robot.count, self.device
)
# note: we exclude gripper from actions in this env
self.num_actions = self._ik_controller.num_actions
elif self.cfg.control.control_type == "default":
# note: we exclude gripper from actions in this env
self.num_actions = self.robot.arm_num_dof
# history
self.actions = torch.zeros((self.num_envs, self.num_actions), device=self.device)
self.previous_actions = torch.zeros((self.num_envs, self.num_actions), device=self.device)
# robot joint actions
self.robot_actions = torch.zeros((self.num_envs, self.robot.num_actions), device=self.device)
# commands
self.ee_des_pose_w = torch.zeros((self.num_envs, 7), device=self.device)
def _debug_vis(self):
# compute error between end-effector and command
error = torch.sum(torch.square(self.ee_des_pose_w[:, :3] - self.robot.data.ee_state_w[:, 0:3]), dim=1)
# set indices of the prim based on error threshold
goal_indices = torch.where(error < 0.002, 1, 0)
# apply to instance manager
# -- goal
self._goal_markers.set_world_poses(self.ee_des_pose_w[:, :3], self.ee_des_pose_w[:, 3:7])
self._goal_markers.set_status(goal_indices)
# -- end-effector
self._ee_markers.set_world_poses(self.robot.data.ee_state_w[:, 0:3], self.robot.data.ee_state_w[:, 3:7])
# -- task-space commands
if self.cfg.control.control_type == "inverse_kinematics":
# convert to world frame
ee_positions = self._ik_controller.desired_ee_pos + self.envs_positions
ee_orientations = self._ik_controller.desired_ee_rot
# set poses
self._cmd_markers.set_world_poses(ee_positions, ee_orientations)
"""
Helper functions - MDP.
"""
def _check_termination(self) -> None:
# extract values from buffer
# compute resets
self.reset_buf[:] = 0
# -- episode length
if self.cfg.terminations.episode_timeout:
self.reset_buf = torch.where(self.episode_length_buf >= self.max_episode_length, 1, self.reset_buf)
def _randomize_ee_desired_pose(self, env_ids: torch.Tensor, cfg: RandomizationCfg.EndEffectorDesiredPoseCfg):
"""Randomize the desired pose of the end-effector."""
# -- desired object root position
if cfg.position_cat == "default":
# constant command for position
self.ee_des_pose_w[env_ids, 0:3] = cfg.position_default
elif cfg.position_cat == "uniform":
# sample uniformly from box
# note: this should be within in the workspace of the robot
self.ee_des_pose_w[env_ids, 0:3] = sample_uniform(
cfg.position_uniform_min, cfg.position_uniform_max, (len(env_ids), 3), device=self.device
)
else:
raise ValueError(f"Invalid category for randomizing the desired object positions '{cfg.position_cat}'.")
# -- desired object root orientation
if cfg.orientation_cat == "default":
# constant position of the object
self.ee_des_pose_w[env_ids, 3:7] = cfg.orientation_default
elif cfg.orientation_cat == "uniform":
self.ee_des_pose_w[env_ids, 3:7] = random_orientation(len(env_ids), self.device)
else:
raise ValueError(
f"Invalid category for randomizing the desired object orientation '{cfg.orientation_cat}'."
)
# transform command from local env to world
self.ee_des_pose_w[env_ids, 0:3] += self.envs_positions[env_ids]
class ReachObservationManager(ObservationManager):
"""Reward manager for single-arm reaching environment."""
def arm_dof_pos_normalized(self, env: ReachEnv):
"""DOF positions for the arm normalized to its max and min ranges."""
return scale_transform(
env.robot.data.arm_dof_pos,
env.robot.data.soft_dof_pos_limits[:, :7, 0],
env.robot.data.soft_dof_pos_limits[:, :7, 1],
)
def arm_dof_vel(self, env: ReachEnv):
"""DOF velocity of the arm."""
return env.robot.data.arm_dof_vel
def ee_position(self, env: ReachEnv):
"""Current end-effector position of the arm."""
return env.robot.data.ee_state_w[:, :3] - env.envs_positions
def ee_position_command(self, env: ReachEnv):
"""Desired end-effector position of the arm."""
return env.ee_des_pose_w[:, :3] - env.envs_positions
def actions(self, env: ReachEnv):
"""Last actions provided to env."""
return env.actions
class ReachRewardManager(RewardManager):
"""Reward manager for single-arm reaching environment."""
def tracking_robot_position_l2(self, env: ReachEnv):
"""Penalize tracking position error using L2-kernel."""
# compute error
return torch.sum(torch.square(env.ee_des_pose_w[:, :3] - env.robot.data.ee_state_w[:, 0:3]), dim=1)
def tracking_robot_position_exp(self, env: ReachEnv, sigma: float):
"""Penalize tracking position error using exp-kernel."""
# compute error
error = torch.sum(torch.square(env.ee_des_pose_w[:, :3] - env.robot.data.ee_state_w[:, 0:3]), dim=1)
return torch.exp(-error / sigma)
def penalizing_robot_dof_velocity_l2(self, env: ReachEnv):
"""Penalize large movements of the robot arm."""
return torch.sum(torch.square(env.robot.data.arm_dof_vel), dim=1)
def penalizing_robot_dof_acceleration_l2(self, env: ReachEnv):
"""Penalize fast movements of the robot arm."""
return torch.sum(torch.square(env.robot.data.dof_acc), dim=1)
def penalizing_action_rate_l2(self, env: ReachEnv):
"""Penalize large variations in action commands."""
return torch.sum(torch.square(env.actions - env.previous_actions), dim=1)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from dataclasses import MISSING
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.command_generators import UniformPoseCommandGeneratorCfg
from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.managers import ActionTermCfg as ActionTerm
from omni.isaac.orbit.managers import CurriculumTermCfg as CurrTerm
from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.orbit.utils.noise import AdditiveUniformNoiseCfg as Unoise
import omni.isaac.orbit_tasks.manipulation.reach.mdp as mdp
##
# Scene definition
##
@configclass
class ReachSceneCfg(InteractiveSceneCfg):
"""Configuration for the scene with a robotic arm."""
# world
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(),
init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
)
table = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Table",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd",
),
init_state=AssetBaseCfg.InitialStateCfg(pos=(0.55, 0.0, 0.0), rot=(0.70711, 0.0, 0.0, 0.70711)),
)
# robots
robot: ArticulationCfg = MISSING
# lights
light = AssetBaseCfg(
prim_path="/World/light",
spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
arm_action: ActionTerm = MISSING
gripper_action: ActionTerm | None = None
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
pose_command = ObsTerm(func=mdp.generated_commands)
actions = ObsTerm(func=mdp.last_action)
def __post_init__(self):
self.enable_corruption = True
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RandomizationCfg:
"""Configuration for randomization."""
reset_robot_joints = RandTerm(
func=mdp.reset_joints_by_scale,
mode="reset",
params={
"position_range": (0.5, 1.5),
"velocity_range": (0.0, 0.0),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# task terms
end_effector_position_tracking = RewTerm(
func=mdp.position_command_error,
weight=-0.2,
params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING)},
)
end_effector_orientation_tracking = RewTerm(
func=mdp.orientation_command_error,
weight=-0.05,
params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING)},
)
# action penalty
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.005)
joint_vel = RewTerm(
func=mdp.joint_vel_l2,
weight=-0.0001,
params={"asset_cfg": SceneEntityCfg("robot")},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
@configclass
class CurriculumCfg:
"""Curriculum terms for the MDP."""
action_rate = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.01, "num_steps": 4500}
)
joint_vel = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.0001, "num_steps": 4500}
)
##
# Environment configuration
##
@configclass
class ReachEnvCfg(RLTaskEnvCfg):
"""Configuration for the reach end-effector pose tracking environment."""
# Scene settings
scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
commands: UniformPoseCommandGeneratorCfg = UniformPoseCommandGeneratorCfg(
asset_name="robot",
body_name=MISSING,
resampling_time_range=(4.0, 4.0),
debug_vis=False,
ranges=UniformPoseCommandGeneratorCfg.Ranges(
pos_x=(0.35, 0.65),
pos_y=(-0.2, 0.2),
pos_z=(0.15, 0.5),
roll=(0.0, 0.0),
pitch=MISSING, # depends on end-effector axis
yaw=(-3.14, 3.14),
),
)
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
randomization: RandomizationCfg = RandomizationCfg()
curriculum: CurriculumCfg = CurriculumCfg()
def __post_init__(self):
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 12.0
self.viewer.eye = (3.5, 3.5, 3.5)
# simulation settings
self.sim.dt = 1.0 / 60.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment