Unverified Commit 1b293e15 authored by David Hoeller's avatar David Hoeller Committed by GitHub

Fixes lift environment for Franka robot (#264)

# Description

Adds the Franka lift RL environment. It also checks that the reach
environment is working.

## Type of change

- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent 0691eef6
...@@ -52,22 +52,22 @@ FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG = ArticulationCfg( ...@@ -52,22 +52,22 @@ FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG = ArticulationCfg(
joint_names_expr=["panda_joint[1-4]"], joint_names_expr=["panda_joint[1-4]"],
effort_limit=87.0, effort_limit=87.0,
velocity_limit=100.0, velocity_limit=100.0,
stiffness=800.0, stiffness=80.0,
damping=40.0, damping=4.0,
), ),
"panda_forearm": ImplicitActuatorCfg( "panda_forearm": ImplicitActuatorCfg(
joint_names_expr=["panda_joint[5-7]"], joint_names_expr=["panda_joint[5-7]"],
effort_limit=12.0, effort_limit=87.0,
velocity_limit=100.0, velocity_limit=100.0,
stiffness=800.0, stiffness=80.0,
damping=40.0, damping=4.0,
), ),
"panda_hand": ImplicitActuatorCfg( "panda_hand": ImplicitActuatorCfg(
joint_names_expr=["panda_finger_joint.*"], joint_names_expr=["panda_finger_joint.*"],
effort_limit=200.0, effort_limit=200.0,
velocity_limit=0.2, velocity_limit=0.2,
stiffness=1e5, stiffness=2e3,
damping=1e3, damping=1e2,
), ),
}, },
soft_joint_pos_limit_factor=1.0, soft_joint_pos_limit_factor=1.0,
......
...@@ -3,10 +3,7 @@ ...@@ -3,10 +3,7 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Sub-module containing utilities for various math operations. """Sub-module containing utilities for various math operations."""
Some of these are imported from the module `omni.isaac.core.utils.torch` for convenience.
"""
from __future__ import annotations from __future__ import annotations
...@@ -690,7 +687,7 @@ def is_identity_pose(pos: torch.tensor, rot: torch.tensor) -> bool: ...@@ -690,7 +687,7 @@ def is_identity_pose(pos: torch.tensor, rot: torch.tensor) -> bool:
# @torch.jit.script # @torch.jit.script
def combine_frame_transforms( def combine_frame_transforms(
t01: torch.Tensor, q01: torch.Tensor, t12: torch.Tensor = None, q12: torch.Tensor = None t01: torch.Tensor, q01: torch.Tensor, t12: torch.Tensor | None = None, q12: torch.Tensor | None = None
) -> tuple[torch.Tensor, torch.Tensor]: ) -> tuple[torch.Tensor, torch.Tensor]:
r"""Combine transformations between two reference frames into a stationary frame. r"""Combine transformations between two reference frames into a stationary frame.
...@@ -701,7 +698,9 @@ def combine_frame_transforms( ...@@ -701,7 +698,9 @@ def combine_frame_transforms(
t01: Position of frame 1 w.r.t. frame 0. Shape is (N, 3). t01: Position of frame 1 w.r.t. frame 0. Shape is (N, 3).
q01: Quaternion orientation of frame 1 w.r.t. frame 0 in (w, x, y, z). Shape is (N, 4). q01: Quaternion orientation of frame 1 w.r.t. frame 0 in (w, x, y, z). Shape is (N, 4).
t12: Position of frame 2 w.r.t. frame 1. Shape is (N, 3). t12: Position of frame 2 w.r.t. frame 1. Shape is (N, 3).
Defaults to None, in which case the position is assumed to be zero.
q12: Quaternion orientation of frame 2 w.r.t. frame 1 in (w, x, y, z). Shape is (N, 4). q12: Quaternion orientation of frame 2 w.r.t. frame 1 in (w, x, y, z). Shape is (N, 4).
Defaults to None, in which case the orientation is assumed to be identity.
Returns: Returns:
A tuple containing the position and orientation of frame 2 w.r.t. frame 0. A tuple containing the position and orientation of frame 2 w.r.t. frame 0.
...@@ -721,9 +720,9 @@ def combine_frame_transforms( ...@@ -721,9 +720,9 @@ def combine_frame_transforms(
return t02, q02 return t02, q02
@torch.jit.script # @torch.jit.script
def subtract_frame_transforms( def subtract_frame_transforms(
t01: torch.Tensor, q01: torch.Tensor, t02: torch.Tensor, q02: torch.Tensor t01: torch.Tensor, q01: torch.Tensor, t02: torch.Tensor | None = None, q02: torch.Tensor | None = None
) -> tuple[torch.Tensor, torch.Tensor]: ) -> tuple[torch.Tensor, torch.Tensor]:
r"""Subtract transformations between two reference frames into a stationary frame. r"""Subtract transformations between two reference frames into a stationary frame.
...@@ -734,7 +733,9 @@ def subtract_frame_transforms( ...@@ -734,7 +733,9 @@ def subtract_frame_transforms(
t01: Position of frame 1 w.r.t. frame 0. Shape is (N, 3). t01: Position of frame 1 w.r.t. frame 0. Shape is (N, 3).
q01: Quaternion orientation of frame 1 w.r.t. frame 0 in (w, x, y, z). Shape is (N, 4). q01: Quaternion orientation of frame 1 w.r.t. frame 0 in (w, x, y, z). Shape is (N, 4).
t02: Position of frame 2 w.r.t. frame 0. Shape is (N, 3). t02: Position of frame 2 w.r.t. frame 0. Shape is (N, 3).
Defaults to None, in which case the position is assumed to be zero.
q02: Quaternion orientation of frame 2 w.r.t. frame 0 in (w, x, y, z). Shape is (N, 4). q02: Quaternion orientation of frame 2 w.r.t. frame 0 in (w, x, y, z). Shape is (N, 4).
Defaults to None, in which case the orientation is assumed to be identity.
Returns: Returns:
A tuple containing the position and orientation of frame 2 w.r.t. frame 1. A tuple containing the position and orientation of frame 2 w.r.t. frame 1.
...@@ -742,10 +743,15 @@ def subtract_frame_transforms( ...@@ -742,10 +743,15 @@ def subtract_frame_transforms(
""" """
# compute orientation # compute orientation
q10 = quat_inv(q01) q10 = quat_inv(q01)
q12 = quat_mul(q10, q02) if q02 is not None:
q12 = quat_mul(q10, q02)
else:
q12 = q10
# compute translation # compute translation
t12 = quat_apply(q10, t02 - t01) if t02 is not None:
t12 = quat_apply(q10, t02 - t01)
else:
t12 = quat_apply(q10, -t01)
return t12, q12 return t12, q12
...@@ -816,7 +822,7 @@ def apply_delta_pose( ...@@ -816,7 +822,7 @@ def apply_delta_pose(
source_pos: Position of source frame. Shape is (N, 3). source_pos: Position of source frame. Shape is (N, 3).
source_rot: Quaternion orientation of source frame in (w, x, y, z). Shape is (N, 4).. source_rot: Quaternion orientation of source frame in (w, x, y, z). Shape is (N, 4)..
delta_pose: Position and orientation displacements. Shape is (N, 6). delta_pose: Position and orientation displacements. Shape is (N, 6).
eps: The tolerance to consider orientation displacement as zero. eps: The tolerance to consider orientation displacement as zero. Defaults to 1.0e-6.
Returns: Returns:
A tuple containing the displaced position and orientation frames. A tuple containing the displaced position and orientation frames.
...@@ -864,7 +870,9 @@ def transform_points( ...@@ -864,7 +870,9 @@ def transform_points(
Args: Args:
points: Points to transform. Shape is (N, P, 3) or (P, 3). points: Points to transform. Shape is (N, P, 3) or (P, 3).
pos: Position of the target frame. Shape is (N, 3) or (3,). pos: Position of the target frame. Shape is (N, 3) or (3,).
Defaults to None, in which case the position is assumed to be zero.
quat: Quaternion orientation of the target frame in (w, x, y, z). Shape is (N, 4) or (4,). quat: Quaternion orientation of the target frame in (w, x, y, z). Shape is (N, 4) or (4,).
Defaults to None, in which case the orientation is assumed to be identity.
Returns: Returns:
Transformed points in the target frame. Shape is (N, P, 3) or (P, 3). Transformed points in the target frame. Shape is (N, P, 3) or (P, 3).
......
...@@ -3,29 +3,7 @@ ...@@ -3,29 +3,7 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
""" """Configurations for the object lift environments."""
Environment for lifting an object with fixed-base robot.
"""
import gymnasium as gym # We leave this file empty since we don't want to expose any configs in this package directly.
# We still need this file to import the "config" module in the parent package.
from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Lift-Franka-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.lift_env_cfg:LiftEnvCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
"robomimic_bc_cfg_entry_point": f"{agents.__name__}.robomimic:bc.json",
"robomimic_bcq_cfg_entry_point": f"{agents.__name__}.robomimic:bcq.json",
},
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configurations for the object lift environments."""
# We leave this file empty since we don't want to expose any configs in this package directly.
# We still need this file to import the "config" module in the parent package.
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents, env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Lift-Cube-Franka-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
kwargs={
"env_cfg_entry_point": env_cfg.FrankaCubeLiftEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
},
disable_env_checker=True,
)
gym.register(
id="Isaac-Lift-Cube-Franka-Play-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
kwargs={
"env_cfg_entry_point": env_cfg.FrankaCubeLiftEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.LiftCubePPORunnerCfg,
},
disable_env_checker=True,
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_cfg # noqa: F401, F403
...@@ -3,40 +3,39 @@ ...@@ -3,40 +3,39 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import ( from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg, RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg, RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg, RslRlPpoAlgorithmCfg,
) )
LIFT_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=96, @configclass
max_iterations=700, class LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg):
save_interval=50, num_steps_per_env = 24
experiment_name="lift", max_iterations = 1500
run_name="", save_interval = 50
resume=False, experiment_name = "franka_lift"
load_run=-1, empirical_normalization = False
load_checkpoint=-1, policy = RslRlPpoActorCriticCfg(
empirical_normalization=False, init_noise_std=0.8,
policy=RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64], actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64],
activation="elu", activation="elu",
), )
algorithm=RslRlPpoAlgorithmCfg( algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0, value_loss_coef=1.0,
use_clipped_value_loss=True, use_clipped_value_loss=True,
clip_param=0.2, clip_param=0.2,
entropy_coef=0.01, entropy_coef=0.006,
num_learning_epochs=5, num_learning_epochs=5,
num_mini_batches=4, num_mini_batches=4,
learning_rate=1.0e-3, learning_rate=1.0e-4,
schedule="adaptive", schedule="adaptive",
gamma=0.99, gamma=0.98,
lam=0.95, lam=0.95,
desired_kl=0.01, desired_kl=0.01,
max_grad_norm=1.0, max_grad_norm=1.0,
), )
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.assets import RigidObjectCfg
from omni.isaac.orbit.sensors import FrameTransformerCfg
from omni.isaac.orbit.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg
from omni.isaac.orbit.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg
from omni.isaac.orbit.sim.spawners.from_files.from_files_cfg import UsdFileCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.orbit_tasks.manipulation.lift import mdp
from omni.isaac.orbit_tasks.manipulation.lift.lift_env_cfg import LiftEnvCfg
##
# Pre-defined configs
##
# isort: off
from omni.isaac.orbit.assets.config.franka import FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
from omni.isaac.orbit.markers.config import FRAME_MARKER_CFG
@configclass
class FrankaCubeLiftEnvCfg(LiftEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# Set Franka as robot
self.scene.robot = FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# Set actions for the specific robot type (franka)
self.actions.body_joint_pos = mdp.JointPositionActionCfg(
asset_name="robot", joint_names=["panda_joint.*"], scale=1.0, use_default_offset=True
)
self.actions.finger_joint_pos = mdp.BinaryJointPositionActionCfg(
asset_name="robot",
joint_names=["panda_finger.*"],
open_command_expr={"panda_finger_.*": 0.3},
close_command_expr={"panda_finger_.*": 0.0},
)
# Set the body name for the end effector
self.commands.body_name = "panda_hand"
# Set Cube as object
self.scene.object = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/Object",
init_state=RigidObjectCfg.InitialStateCfg(pos=[0.5, 0, 0.055], rot=[1, 0, 0, 0]),
spawn=UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
scale=(0.8, 0.8, 0.8),
rigid_props=RigidBodyPropertiesCfg(
solver_position_iteration_count=16,
solver_velocity_iteration_count=1,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=5.0,
disable_gravity=False,
),
),
)
# Listens to the required transforms
marker_cfg = FRAME_MARKER_CFG.copy()
marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1)
marker_cfg.prim_path = "/Visuals/FrameTransformer"
self.scene.ee_frame = FrameTransformerCfg(
prim_path="{ENV_REGEX_NS}/Robot/panda_link0",
debug_vis=False,
visualizer_cfg=marker_cfg,
target_frames=[
FrameTransformerCfg.FrameCfg(
prim_path="{ENV_REGEX_NS}/Robot/panda_hand",
name="end_effector",
offset=OffsetCfg(
pos=[0.0, 0.0, 0.1034],
),
),
],
)
@configclass
class FrankaCubeLiftEnvCfg_PLAY(FrankaCubeLiftEnvCfg):
def __post_init__(self):
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# disable randomization for play
self.observations.policy.enable_corruption = False
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from omni.isaac.orbit.controllers.differential_inverse_kinematics import DifferentialInverseKinematicsCfg
from omni.isaac.orbit.objects import RigidObjectCfg
from omni.isaac.orbit.robots.config.franka import FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
from omni.isaac.orbit.robots.single_arm import SingleArmManipulatorCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.orbit_tasks.isaac_env_cfg import EnvCfg, IsaacEnvCfg, PhysxCfg, SimCfg, ViewerCfg
##
# Scene settings
##
@configclass
class TableCfg:
"""Properties for the table."""
# note: we use instanceable asset since it consumes less memory
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"
@configclass
class ManipulationObjectCfg(RigidObjectCfg):
"""Properties for the object to manipulate in the scene."""
meta_info = RigidObjectCfg.MetaInfoCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
scale=(0.8, 0.8, 0.8),
)
init_state = RigidObjectCfg.InitialStateCfg(
pos=(0.4, 0.0, 0.075), rot=(1.0, 0.0, 0.0, 0.0), lin_vel=(0.0, 0.0, 0.0), ang_vel=(0.0, 0.0, 0.0)
)
rigid_props = RigidObjectCfg.RigidBodyPropertiesCfg(
solver_position_iteration_count=16,
solver_velocity_iteration_count=1,
max_angular_velocity=1000.0,
max_linear_velocity=1000.0,
max_depenetration_velocity=5.0,
disable_gravity=False,
)
physics_material = RigidObjectCfg.PhysicsMaterialCfg(
static_friction=0.5, dynamic_friction=0.5, restitution=0.0, prim_path="/World/Materials/cubeMaterial"
)
@configclass
class GoalMarkerCfg:
"""Properties for visualization marker."""
# usd file to import
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/frame_prim.usd"
# scale of the asset at import
scale = [0.05, 0.05, 0.05] # x,y,z
@configclass
class FrameMarkerCfg:
"""Properties for visualization marker."""
# usd file to import
usd_path = f"{ISAAC_NUCLEUS_DIR}/Props/UIElements/frame_prim.usd"
# scale of the asset at import
scale = [0.1, 0.1, 0.1] # x,y,z
##
# MDP settings
##
@configclass
class RandomizationCfg:
"""Randomization of scene at reset."""
@configclass
class ObjectInitialPoseCfg:
"""Randomization of object initial pose."""
# category
position_cat: str = "default" # randomize position: "default", "uniform"
orientation_cat: str = "default" # randomize position: "default", "uniform"
# randomize position
position_uniform_min = [0.4, -0.25, 0.075] # position (x,y,z)
position_uniform_max = [0.6, 0.25, 0.075] # position (x,y,z)
@configclass
class ObjectDesiredPoseCfg:
"""Randomization of object desired pose."""
# category
position_cat: str = "default" # randomize position: "default", "uniform"
orientation_cat: str = "default" # randomize position: "default", "uniform"
# randomize position
position_default = [0.5, 0.0, 0.5] # position default (x,y,z)
position_uniform_min = [0.4, -0.25, 0.25] # position (x,y,z)
position_uniform_max = [0.6, 0.25, 0.5] # position (x,y,z)
# randomize orientation
orientation_default = [1.0, 0.0, 0.0, 0.0] # orientation default
# initialize
object_initial_pose: ObjectInitialPoseCfg = ObjectInitialPoseCfg()
object_desired_pose: ObjectDesiredPoseCfg = ObjectDesiredPoseCfg()
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg:
"""Observations for policy group."""
# global group settings
enable_corruption: bool = True
# observation terms
# -- joint state
arm_dof_pos = {"scale": 1.0}
# arm_dof_pos_scaled = {"scale": 1.0}
# arm_dof_vel = {"scale": 0.5, "noise": {"name": "uniform", "min": -0.01, "max": 0.01}}
tool_dof_pos_scaled = {"scale": 1.0}
# -- end effector state
tool_positions = {"scale": 1.0}
tool_orientations = {"scale": 1.0}
# -- object state
# object_positions = {"scale": 1.0}
# object_orientations = {"scale": 1.0}
object_relative_tool_positions = {"scale": 1.0}
# object_relative_tool_orientations = {"scale": 1.0}
# -- object desired state
object_desired_positions = {"scale": 1.0}
# -- previous action
arm_actions = {"scale": 1.0}
tool_actions = {"scale": 1.0}
# global observation settings
return_dict_obs_in_group = False
"""Whether to return observations as dictionary or flattened vector within groups."""
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# -- robot-centric
# reaching_object_position_l2 = {"weight": 0.0}
# reaching_object_position_exp = {"weight": 2.5, "sigma": 0.25}
reaching_object_position_tanh = {"weight": 2.5, "sigma": 0.1}
# penalizing_arm_dof_velocity_l2 = {"weight": 1e-5}
# penalizing_tool_dof_velocity_l2 = {"weight": 1e-5}
# penalizing_robot_dof_acceleration_l2 = {"weight": 1e-7}
# -- action-centric
penalizing_arm_action_rate_l2 = {"weight": 1e-2}
# penalizing_tool_action_l2 = {"weight": 1e-2}
# -- object-centric
# tracking_object_position_exp = {"weight": 5.0, "sigma": 0.25, "threshold": 0.08}
tracking_object_position_tanh = {"weight": 5.0, "sigma": 0.2, "threshold": 0.08}
lifting_object_success = {"weight": 3.5, "threshold": 0.08}
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
episode_timeout = True # reset when episode length ended
object_falling = True # reset when object falls off the table
is_success = False # reset when object is lifted
@configclass
class ControlCfg:
"""Processing of MDP actions."""
# action space
control_type = "default" # "default", "inverse_kinematics"
# decimation: Number of control action updates @ sim dt per policy dt
decimation = 2
# configuration loaded when control_type == "inverse_kinematics"
inverse_kinematics: DifferentialInverseKinematicsCfg = DifferentialInverseKinematicsCfg(
command_type="pose_rel",
ik_method="dls",
position_command_scale=(0.1, 0.1, 0.1),
rotation_command_scale=(0.1, 0.1, 0.1),
)
##
# Environment configuration
##
@configclass
class LiftEnvCfg(IsaacEnvCfg):
"""Configuration for the Lift environment."""
# General Settings
env: EnvCfg = EnvCfg(num_envs=4096, env_spacing=2.5, episode_length_s=5.0)
viewer: ViewerCfg = ViewerCfg(debug_vis=True, eye=(7.5, 7.5, 7.5), lookat=(0.0, 0.0, 0.0))
# Physics settings
sim: SimCfg = SimCfg(
dt=0.01,
substeps=1,
physx=PhysxCfg(
gpu_found_lost_aggregate_pairs_capacity=1024 * 1024 * 4,
gpu_total_aggregate_pairs_capacity=16 * 1024,
friction_correlation_distance=0.00625,
friction_offset_threshold=0.01,
bounce_threshold_velocity=0.2,
),
)
# Scene Settings
# -- robot
robot: SingleArmManipulatorCfg = FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG
# -- object
object: ManipulationObjectCfg = ManipulationObjectCfg()
# -- table
table: TableCfg = TableCfg()
# -- visualization marker
goal_marker: GoalMarkerCfg = GoalMarkerCfg()
frame_marker: FrameMarkerCfg = FrameMarkerCfg()
# MDP settings
randomization: RandomizationCfg = RandomizationCfg()
observations: ObservationsCfg = ObservationsCfg()
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# Controller settings
control: ControlCfg = ControlCfg()
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
from dataclasses import MISSING
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
from omni.isaac.orbit.command_generators.command_generator_cfg import UniformPoseCommandGeneratorCfg
from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.managers import CurriculumTermCfg as CurrTerm
from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg
from omni.isaac.orbit.sim.spawners.from_files.from_files_cfg import GroundPlaneCfg, UsdFileCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
from . import mdp
##
# Scene definition
##
@configclass
class ObjectTableSceneCfg(InteractiveSceneCfg):
"""Configuration for the lift scene with a robot and a object.
This is the abstract base implementation, the exact scene is defined in the derived classes
which need to set the target object, robot and end-effector frames
"""
# robots: will be populated by agent env cfg
robot: ArticulationCfg = MISSING
# end-effector sensor: will be populated by agent env cfg
ee_frame: FrameTransformerCfg = MISSING
# target object: will be populated by agent env cfg
object: RigidObjectCfg = MISSING
# Table
table = AssetBaseCfg(
prim_path="{ENV_REGEX_NS}/Table",
init_state=AssetBaseCfg.InitialStateCfg(pos=[0.5, 0, 0], rot=[0.707, 0, 0, 0.707]),
spawn=UsdFileCfg(usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"),
)
# plane
plane = AssetBaseCfg(
prim_path="/World/GroundPlane",
init_state=AssetBaseCfg.InitialStateCfg(pos=[0, 0, -1.05]),
spawn=GroundPlaneCfg(),
)
# lights
light = AssetBaseCfg(
prim_path="/World/light",
spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
# will be set by agent env cfg
body_joint_pos: mdp.JointPositionActionCfg = MISSING
finger_joint_pos: mdp.BinaryJointPositionActionCfg = MISSING
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
joint_pos = ObsTerm(func=mdp.joint_pos_rel)
joint_vel = ObsTerm(func=mdp.joint_vel_rel)
object_position = ObsTerm(func=mdp.object_position_in_robot_root_frame)
target_object_position = ObsTerm(func=mdp.generated_commands)
actions = ObsTerm(func=mdp.last_action)
def __post_init__(self):
self.enable_corruption = True
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RandomizationCfg:
"""Configuration for randomization."""
reset_all = RandTerm(func=mdp.reset_scene_to_default, mode="reset")
reset_object_position = RandTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"pose_range": {"x": (-0.1, 0.1), "y": (-0.25, 0.25), "z": (0.0, 0.0)},
"velocity_range": {},
"asset_cfg": SceneEntityCfg("object", body_names="Object"),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
reaching_object = RewTerm(func=mdp.object_ee_distance, params={"std": 0.1}, weight=1.0)
lifting_object = RewTerm(func=mdp.object_is_lifted, params={"minimal_height": 0.06}, weight=15.0)
object_goal_tracking = RewTerm(
func=mdp.object_goal_distance,
params={"std": 0.3, "minimal_height": 0.06},
weight=16.0,
)
object_goal_tracking_fine_grained = RewTerm(
func=mdp.object_goal_distance,
params={"std": 0.05, "minimal_height": 0.06},
weight=5.0,
)
# action penalty
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1e-3)
joint_vel = RewTerm(
func=mdp.joint_vel_l2,
weight=-1e-4,
params={"asset_cfg": SceneEntityCfg("robot")},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
object_dropping = DoneTerm(
func=mdp.base_height, params={"minimum_height": -0.05, "asset_cfg": SceneEntityCfg("object")}
)
@configclass
class CurriculumCfg:
"""Curriculum terms for the MDP."""
action_rate = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}
)
joint_vel = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}
)
##
# Environment configuration
##
@configclass
class LiftEnvCfg(RLTaskEnvCfg):
"""Configuration for the lifting environment."""
# Scene settings
scene: ObjectTableSceneCfg = ObjectTableSceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
commands: UniformPoseCommandGeneratorCfg = UniformPoseCommandGeneratorCfg(
asset_name="robot",
body_name=MISSING, # will be set by agent env cfg
resampling_time_range=(5.0, 5.0),
debug_vis=True,
ranges=UniformPoseCommandGeneratorCfg.Ranges(
pos_x=(0.4, 0.6), pos_y=(-0.25, 0.25), pos_z=(0.25, 0.5), roll=(0.0, 0.0), pitch=(0.0, 0.0), yaw=(0.0, 0.0)
),
)
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
randomization: RandomizationCfg = RandomizationCfg()
curriculum: CurriculumCfg = CurriculumCfg()
def __post_init__(self):
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 5.0
# simulation settings
self.sim.dt = 0.01 # 100Hz
self.sim.physx.bounce_threshold_velocity = 0.2
self.sim.physx.bounce_threshold_velocity = 0.01
self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 1024 * 1024 * 4
self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024
self.sim.physx.friction_correlation_distance = 0.00625
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the lift environments."""
from omni.isaac.orbit.envs.mdp import * # noqa: F401, F403
from .observations import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.orbit.assets import RigidObject
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.utils.math import subtract_frame_transforms
if TYPE_CHECKING:
from omni.isaac.orbit.envs import RLTaskEnv
def object_position_in_robot_root_frame(
env: RLTaskEnv,
robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
) -> torch.Tensor:
"""The position of the object in the robot's root frame."""
robot: RigidObject = env.scene[robot_cfg.name]
object: RigidObject = env.scene[object_cfg.name]
object_pos_w = object.data.root_pos_w[:, :3]
object_pos_b, _ = subtract_frame_transforms(
robot.data.root_state_w[:, :3], robot.data.root_state_w[:, 3:7], object_pos_w
)
return object_pos_b
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.orbit.assets import RigidObject
from omni.isaac.orbit.command_generators import UniformPoseCommandGenerator
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.sensors import FrameTransformer
from omni.isaac.orbit.utils.math import combine_frame_transforms
if TYPE_CHECKING:
from omni.isaac.orbit.envs import RLTaskEnv
def object_is_lifted(
env: RLTaskEnv, minimal_height: float, object_cfg: SceneEntityCfg = SceneEntityCfg("object")
) -> torch.Tensor:
"""Reward the agent for lifting the object above the minimal height."""
object: RigidObject = env.scene[object_cfg.name]
return torch.where(object.data.root_pos_w[:, 2] > minimal_height, 1.0, 0.0)
def object_ee_distance(
env: RLTaskEnv,
std: float,
object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
ee_frame_cfg: SceneEntityCfg = SceneEntityCfg("ee_frame"),
) -> torch.Tensor:
"""Reward the agent for reaching the object using tanh-kernel."""
# Target object position: (num_envs, 3)
object: RigidObject = env.scene[object_cfg.name]
ee_frame: FrameTransformer = env.scene[ee_frame_cfg.name]
cube_pos_w = object.data.root_pos_w
# End-effector position: (num_envs, 3)
ee_w = ee_frame.data.target_pos_w[..., 0, :]
# Distance of the end-effector to the object: (num_envs,)
object_ee_distance = torch.norm(cube_pos_w - ee_w, dim=1)
return 1 - torch.tanh(object_ee_distance / std)
def object_goal_distance(
env: RLTaskEnv,
std: float,
minimal_height: float,
robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
) -> torch.Tensor:
"""Reward the agent for tracking the goal pose using tanh-kernel."""
robot: RigidObject = env.scene[robot_cfg.name]
object: RigidObject = env.scene[object_cfg.name]
command_manager: UniformPoseCommandGenerator = env.command_manager
des_pos_b = command_manager.command[:, :3]
des_pos_w, _ = combine_frame_transforms(robot.data.root_state_w[:, :3], robot.data.root_state_w[:, 3:7], des_pos_b)
# distance of the end-effector to the object: (num_envs,)
distance = torch.norm(des_pos_w - object.data.root_pos_w[:, :3], dim=1)
# rewarded if the object is lifted above the threshold
return (object.data.root_pos_w[:, 2] > minimal_height) * (1 - torch.tanh(distance / std))
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import gymnasium as gym import gymnasium as gym
from . import agents, joint_env_cfg from . import agents, env_cfg
## ##
# Register Gym environments. # Register Gym environments.
...@@ -16,9 +16,9 @@ gym.register( ...@@ -16,9 +16,9 @@ gym.register(
entry_point="omni.isaac.orbit.envs:RLTaskEnv", entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True, disable_env_checker=True,
kwargs={ kwargs={
"env_cfg_entry_point": joint_env_cfg.FrankaReachEnvCfg, "env_cfg_entry_point": env_cfg.FrankaReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:FrankaReachPPORunnerCfg",
}, },
) )
...@@ -27,8 +27,8 @@ gym.register( ...@@ -27,8 +27,8 @@ gym.register(
entry_point="omni.isaac.orbit.envs:RLTaskEnv", entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True, disable_env_checker=True,
kwargs={ kwargs={
"env_cfg_entry_point": joint_env_cfg.FrankaReachEnvCfg_PLAY, "env_cfg_entry_point": env_cfg.FrankaReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_cfg:FrankaReachPPORunnerCfg",
}, },
) )
...@@ -17,7 +17,7 @@ class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): ...@@ -17,7 +17,7 @@ class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24 num_steps_per_env = 24
max_iterations = 1000 max_iterations = 1000
save_interval = 50 save_interval = 50
experiment_name = "reach_franka" experiment_name = "franka_reach"
run_name = "" run_name = ""
resume = False resume = False
empirical_normalization = False empirical_normalization = False
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import gymnasium as gym import gymnasium as gym
from . import agents, joint_env_cfg from . import agents, env_cfg
## ##
# Register Gym environments. # Register Gym environments.
...@@ -16,7 +16,7 @@ gym.register( ...@@ -16,7 +16,7 @@ gym.register(
entry_point="omni.isaac.orbit.envs:RLTaskEnv", entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True, disable_env_checker=True,
kwargs={ kwargs={
"env_cfg_entry_point": joint_env_cfg.UR10ReachEnvCfg, "env_cfg_entry_point": env_cfg.UR10ReachEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
}, },
...@@ -27,7 +27,7 @@ gym.register( ...@@ -27,7 +27,7 @@ gym.register(
entry_point="omni.isaac.orbit.envs:RLTaskEnv", entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True, disable_env_checker=True,
kwargs={ kwargs={
"env_cfg_entry_point": joint_env_cfg.UR10ReachEnvCfg_PLAY, "env_cfg_entry_point": env_cfg.UR10ReachEnvCfg_PLAY,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg",
}, },
......
...@@ -126,7 +126,7 @@ class RewardsCfg: ...@@ -126,7 +126,7 @@ class RewardsCfg:
) )
# action penalty # action penalty
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.005) action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.0001)
joint_vel = RewTerm( joint_vel = RewTerm(
func=mdp.joint_vel_l2, func=mdp.joint_vel_l2,
weight=-0.0001, weight=-0.0001,
...@@ -146,11 +146,7 @@ class CurriculumCfg: ...@@ -146,11 +146,7 @@ class CurriculumCfg:
"""Curriculum terms for the MDP.""" """Curriculum terms for the MDP."""
action_rate = CurrTerm( action_rate = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.01, "num_steps": 4500} func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.005, "num_steps": 4500}
)
joint_vel = CurrTerm(
func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.0001, "num_steps": 4500}
) )
...@@ -172,7 +168,7 @@ class ReachEnvCfg(RLTaskEnvCfg): ...@@ -172,7 +168,7 @@ class ReachEnvCfg(RLTaskEnvCfg):
asset_name="robot", asset_name="robot",
body_name=MISSING, body_name=MISSING,
resampling_time_range=(4.0, 4.0), resampling_time_range=(4.0, 4.0),
debug_vis=False, debug_vis=True,
ranges=UniformPoseCommandGeneratorCfg.Ranges( ranges=UniformPoseCommandGeneratorCfg.Ranges(
pos_x=(0.35, 0.65), pos_x=(0.35, 0.65),
pos_y=(-0.2, 0.2), pos_y=(-0.2, 0.2),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment