Unverified Commit 48146c2b authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Fixes MuJoCo Humanoid and Ant environments (#252)

# Description

This MR fixes the MuJoCo-style Humanoid and Ant environments to use the new structure. The MR also includes some fixes in the core APIs.

## Type of change

- Bug fix (non-breaking change which fixes an issue)
- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my feature works
- [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
parent 4565902f
[package] [package]
# Note: Semantic Versioning is used: https://semver.org/ # Note: Semantic Versioning is used: https://semver.org/
version = "0.9.51" version = "0.9.52"
# Description # Description
title = "ORBIT framework for Robot Learning" title = "ORBIT framework for Robot Learning"
......
Changelog Changelog
--------- ---------
0.9.52 (2023-11-29)
~~~~~~~~~~~~~~~~~~~
Changed
^^^^^^^
* Changed the warning print in :meth:`omni.isaac.orbit.sim.utils.apply_nested` method
to be more descriptive. Earlier, it was printing a warning for every instanced prim.
Now, it only prints a warning if it could not apply the attribute to any of the prims.
Added
^^^^^
* Added the method :meth:`omni.isaac.orbit.utils.assets.retrieve_file_path` to
obtain the absolute path of a file on the Nucleus server or locally.
Fixed
^^^^^
* Fixed hiding of STOP button in the :class:`AppLauncher` class when running the
simulation in headless mode.
* Fixed a bug with :meth:`omni.isaac.orbit.sim.utils.clone` failing when the input prim path
had no parent (example: "/Table").
0.9.51 (2023-11-29) 0.9.51 (2023-11-29)
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
......
...@@ -208,6 +208,8 @@ class AppLauncher: ...@@ -208,6 +208,8 @@ class AppLauncher:
self._create_app() self._create_app()
# Load IsaacSim extensions # Load IsaacSim extensions
self._load_extensions() self._load_extensions()
# Hide the stop button in the toolbar
self._hide_stop_button()
""" """
Properties. Properties.
...@@ -561,8 +563,6 @@ class AppLauncher: ...@@ -561,8 +563,6 @@ class AppLauncher:
# add orbit modules back to sys.modules # add orbit modules back to sys.modules
for key, value in hacked_modules.items(): for key, value in hacked_modules.items():
sys.modules[key] = value sys.modules[key] = value
# hide the stop button in the toolbar
self._hide_stop_button()
def _load_extensions(self): def _load_extensions(self):
"""Load correct extensions based on AppLauncher's resolved config member variables.""" """Load correct extensions based on AppLauncher's resolved config member variables."""
...@@ -648,13 +648,20 @@ class AppLauncher: ...@@ -648,13 +648,20 @@ class AppLauncher:
) )
def _hide_stop_button(self): def _hide_stop_button(self):
"""Hide the stop button in the toolbar.""" """Hide the stop button in the toolbar.
import omni.kit.widget.toolbar
For standalone executions, having a stop button is confusing since it invalidates the whole simulation.
# grey out the stop button because we don't want to stop the simulation manually in standalone mode Thus, we hide the button so that users don't accidentally click it.
toolbar = omni.kit.widget.toolbar.get_instance() """
play_button_group = toolbar._builtin_tools._play_button_group # type: ignore # when we are truly headless, then we can't import the widget toolbar
if play_button_group is not None: # thus, we only hide the stop button when we are not headless (i.e. GUI is enabled)
play_button_group._stop_button.visible = False # type: ignore if self._livestream >= 1 or not self._headless:
play_button_group._stop_button.enabled = False # type: ignore import omni.kit.widget.toolbar
play_button_group._stop_button = None # type: ignore
# grey out the stop button because we don't want to stop the simulation manually in standalone mode
toolbar = omni.kit.widget.toolbar.get_instance()
play_button_group = toolbar._builtin_tools._play_button_group # type: ignore
if play_button_group is not None:
play_button_group._stop_button.visible = False # type: ignore
play_button_group._stop_button.enabled = False # type: ignore
play_button_group._stop_button = None # type: ignore
...@@ -75,7 +75,7 @@ class JointAction(ActionTerm): ...@@ -75,7 +75,7 @@ class JointAction(ActionTerm):
if isinstance(cfg.scale, (float, int)): if isinstance(cfg.scale, (float, int)):
self._scale = float(cfg.scale) self._scale = float(cfg.scale)
elif isinstance(cfg.scale, dict): elif isinstance(cfg.scale, dict):
self._scale = torch.ones(1.0, self.action_dim, device=self.device) self._scale = torch.ones(self.num_envs, self.action_dim, device=self.device)
# resolve the dictionary config # resolve the dictionary config
index_list, _, value_list = string_utils.resolve_matching_names_values(self.cfg.scale, self._joint_names) index_list, _, value_list = string_utils.resolve_matching_names_values(self.cfg.scale, self._joint_names)
self._scale[:, index_list] = torch.tensor(value_list, device=self.device) self._scale[:, index_list] = torch.tensor(value_list, device=self.device)
......
...@@ -14,6 +14,7 @@ from __future__ import annotations ...@@ -14,6 +14,7 @@ from __future__ import annotations
import torch import torch
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import omni.isaac.orbit.utils.math as math_utils
from omni.isaac.orbit.assets import Articulation, RigidObject from omni.isaac.orbit.assets import Articulation, RigidObject
from omni.isaac.orbit.managers import SceneEntityCfg from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.sensors import RayCaster from omni.isaac.orbit.sensors import RayCaster
...@@ -26,6 +27,13 @@ Root state. ...@@ -26,6 +27,13 @@ Root state.
""" """
def base_pos_z(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Root height in the simulation world frame."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return asset.data.root_pos_w[:, 2].unsqueeze(-1)
def base_lin_vel(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: def base_lin_vel(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Root linear velocity in the asset's root frame.""" """Root linear velocity in the asset's root frame."""
# extract the used quantities (to enable type-hinting) # extract the used quantities (to enable type-hinting)
...@@ -59,6 +67,15 @@ def joint_pos_rel(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robo ...@@ -59,6 +67,15 @@ def joint_pos_rel(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robo
return asset.data.joint_pos - asset.data.default_joint_pos return asset.data.joint_pos - asset.data.default_joint_pos
def joint_pos_norm(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""The joint positions of the asset normalized with the asset's joint limits."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return math_utils.scale_transform(
asset.data.joint_pos, asset.data.soft_joint_pos_limits[..., 0], asset.data.soft_joint_pos_limits[..., 1]
)
def joint_vel_rel(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")): def joint_vel_rel(env: BaseEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")):
"""The joint velocities of the asset w.r.t. the default joint velocities.""" """The joint velocities of the asset w.r.t. the default joint velocities."""
# extract the used quantities (to enable type-hinting) # extract the used quantities (to enable type-hinting)
...@@ -79,6 +96,18 @@ def height_scan(env: BaseEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor: ...@@ -79,6 +96,18 @@ def height_scan(env: BaseEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
return sensor.data.pos_w[:, 2].unsqueeze(1) - sensor.data.ray_hits_w[..., 2] - 0.5 return sensor.data.pos_w[:, 2].unsqueeze(1) - sensor.data.ray_hits_w[..., 2] - 0.5
def body_incoming_wrench(env: BaseEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Incoming spatial wrench on bodies of an articulation in the simulation world frame.
This is the 6-D wrench (force and torque) applied to the body link by the incoming joint force.
"""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# obtain the link incoming forces in world frame
link_incoming_forces = asset.root_physx_view.get_link_incoming_joint_force()[:, asset_cfg.body_ids]
return link_incoming_forces.view(env.num_envs, -1)
""" """
Actions. Actions.
""" """
......
...@@ -240,6 +240,9 @@ def reset_joints_by_scale( ...@@ -240,6 +240,9 @@ def reset_joints_by_scale(
# scale these values randomly # scale these values randomly
joint_pos *= sample_uniform(*position_range, joint_pos.shape, joint_pos.device) joint_pos *= sample_uniform(*position_range, joint_pos.shape, joint_pos.device)
joint_vel *= sample_uniform(*velocity_range, joint_vel.shape, joint_vel.device) joint_vel *= sample_uniform(*velocity_range, joint_vel.shape, joint_vel.device)
# clamp joint pos to limits
joint_pos_limits = asset.data.soft_joint_pos_limits[env_ids]
joint_pos = joint_pos.clamp_(joint_pos_limits[..., 0], joint_pos_limits[..., 1])
# set into the physics simulation # set into the physics simulation
asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
...@@ -266,6 +269,9 @@ def reset_joints_by_offset( ...@@ -266,6 +269,9 @@ def reset_joints_by_offset(
# bias these values randomly # bias these values randomly
joint_pos += sample_uniform(*position_range, joint_pos.shape, joint_pos.device) joint_pos += sample_uniform(*position_range, joint_pos.shape, joint_pos.device)
joint_vel += sample_uniform(*velocity_range, joint_vel.shape, joint_vel.device) joint_vel += sample_uniform(*velocity_range, joint_vel.shape, joint_vel.device)
# clamp joint pos to limits
joint_pos_limits = asset.data.soft_joint_pos_limits[env_ids]
joint_pos = joint_pos.clamp_(joint_pos_limits[..., 0], joint_pos_limits[..., 1])
# set into the physics simulation # set into the physics simulation
asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids) asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
...@@ -273,15 +279,20 @@ def reset_joints_by_offset( ...@@ -273,15 +279,20 @@ def reset_joints_by_offset(
def reset_scene_to_default(env: BaseEnv, env_ids: torch.Tensor): def reset_scene_to_default(env: BaseEnv, env_ids: torch.Tensor):
"""Reset the scene to the default state specified in the scene configuration.""" """Reset the scene to the default state specified in the scene configuration."""
# root states # rigid bodies
for rigid_object in env.scene.rigid_objects.values() + env.scene.articulations.values(): for rigid_object in env.scene.rigid_objects.values():
# obtain default and deal with the offset for env origins # obtain default and deal with the offset for env origins
default_root_state = rigid_object.data.default_root_state[env_ids].clone() default_root_state = rigid_object.data.default_root_state[env_ids].clone()
default_root_state[:, 0:3] += env.scene.env_origins[env_ids] default_root_state[:, 0:3] += env.scene.env_origins[env_ids]
# set into the physics simulation # set into the physics simulation
rigid_object.write_root_state_to_sim(default_root_state, env_ids=env_ids) rigid_object.write_root_state_to_sim(default_root_state, env_ids=env_ids)
# joint states # articulations
for articulation_asset in env.scene.articulations.values(): for articulation_asset in env.scene.articulations.values():
# obtain default and deal with the offset for env origins
default_root_state = articulation_asset.data.default_root_state[env_ids].clone()
default_root_state[:, 0:3] += env.scene.env_origins[env_ids]
# set into the physics simulation
articulation_asset.write_root_state_to_sim(default_root_state, env_ids=env_ids)
# obtain default joint positions # obtain default joint positions
default_joint_pos = articulation_asset.data.default_joint_pos[env_ids].clone() default_joint_pos = articulation_asset.data.default_joint_pos[env_ids].clone()
default_joint_vel = articulation_asset.data.default_joint_vel[env_ids].clone() default_joint_vel = articulation_asset.data.default_joint_vel[env_ids].clone()
......
...@@ -26,14 +26,14 @@ General. ...@@ -26,14 +26,14 @@ General.
""" """
def alive_bonus(env: RLTaskEnv) -> torch.Tensor: def is_alive(env: RLTaskEnv) -> torch.Tensor:
"""Reward for being alive.""" """Reward for being alive."""
return ~env.reset_buf * 1.0 return (~env.termination_manager.terminated).float()
def termination_penalty(env: RLTaskEnv) -> torch.Tensor: def is_terminated(env: RLTaskEnv) -> torch.Tensor:
"""Penalize terminated episodes that don't correspond to episodic timeouts.""" """Penalize terminated episodes that don't correspond to episodic timeouts."""
return env.reset_buf * (~env.termination_manager.time_outs) return env.termination_manager.terminated.float()
""" """
...@@ -177,6 +177,11 @@ def action_rate_l2(env: RLTaskEnv) -> torch.Tensor: ...@@ -177,6 +177,11 @@ def action_rate_l2(env: RLTaskEnv) -> torch.Tensor:
return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1) return torch.sum(torch.square(env.action_manager.action - env.action_manager.prev_action), dim=1)
def action_l2(env: RLTaskEnv) -> torch.Tensor:
"""Penalize the actions using L2-kernel."""
return torch.sum(torch.square(env.action_manager.action), dim=1)
""" """
Contact sensor. Contact sensor.
""" """
......
...@@ -54,8 +54,8 @@ class RigidBodyPropertiesCfg: ...@@ -54,8 +54,8 @@ class RigidBodyPropertiesCfg:
For more information on kinematic bodies, please refer to the `documentation <https://openusd.org/release/wp_rigid_body_physics.html#kinematic-bodies>`_. For more information on kinematic bodies, please refer to the `documentation <https://openusd.org/release/wp_rigid_body_physics.html#kinematic-bodies>`_.
""" """
disable_gravity: bool | None = False disable_gravity: bool | None = None
"""Disable gravity for the actor. Defaults to False.""" """Disable gravity for the actor."""
linear_damping: float | None = None linear_damping: float | None = None
"""Linear damping for the body.""" """Linear damping for the body."""
angular_damping: float | None = None angular_damping: float | None = None
......
...@@ -123,11 +123,14 @@ def apply_nested(func: Callable) -> Callable: ...@@ -123,11 +123,14 @@ def apply_nested(func: Callable) -> Callable:
The function iterates over the provided prim path and all its children to apply input function The function iterates over the provided prim path and all its children to apply input function
to all prims under the specified prim path. to all prims under the specified prim path.
Note: If the function succeeds to apply to a prim, it will not look at the children of that prim.
If the function succeeds to apply to a prim, it will not look at the children of that prim. This is based on the physics behavior that nested schemas are not allowed. For example, a parent prim
This is based on the physics behavior that nested schemas are not allowed. For example, a parent prim and its child prim cannot both have a rigid-body schema applied on them, or it is not possible to
and its child prim cannot both have a rigid-body schema applied on them, or it is not possible to have nested articulations.
have nested articulations.
While traversing the prims under the specified prim path, the function will throw a warning if it
does not succeed to apply the function to any prim. This is because the user may have intended to
apply the function to a prim that does not have valid attributes, or the prim may be an instanced prim.
Args: Args:
func: The function to apply to all prims under a specified prim-path. The function func: The function to apply to all prims under a specified prim-path. The function
...@@ -156,6 +159,9 @@ def apply_nested(func: Callable) -> Callable: ...@@ -156,6 +159,9 @@ def apply_nested(func: Callable) -> Callable:
# check if prim is valid # check if prim is valid
if not prim.IsValid(): if not prim.IsValid():
raise ValueError(f"Prim at path '{prim_path}' is not valid.") raise ValueError(f"Prim at path '{prim_path}' is not valid.")
# add iterable to check if property was applied on any of the prims
count_success = 0
instanced_prim_paths = []
# iterate over all prims under prim-path # iterate over all prims under prim-path
all_prims = [prim] all_prims = [prim]
while len(all_prims) > 0: while len(all_prims) > 0:
...@@ -163,10 +169,8 @@ def apply_nested(func: Callable) -> Callable: ...@@ -163,10 +169,8 @@ def apply_nested(func: Callable) -> Callable:
child_prim = all_prims.pop(0) child_prim = all_prims.pop(0)
child_prim_path = child_prim.GetPath().pathString # type: ignore child_prim_path = child_prim.GetPath().pathString # type: ignore
# check if prim is a prototype # check if prim is a prototype
# note: we prefer throwing a warning instead of ignoring the prim since the user may
# have intended to set properties on the prototype prim.
if child_prim.IsInstance(): if child_prim.IsInstance():
carb.log_warn(f"Cannot perform '{func.__name__}' on instanced prim: '{child_prim_path}'") instanced_prim_paths.append(child_prim_path)
continue continue
# set properties # set properties
success = func(child_prim_path, *args, **kwargs) success = func(child_prim_path, *args, **kwargs)
...@@ -174,6 +178,17 @@ def apply_nested(func: Callable) -> Callable: ...@@ -174,6 +178,17 @@ def apply_nested(func: Callable) -> Callable:
# this is based on the physics behavior that nested schemas are not allowed # this is based on the physics behavior that nested schemas are not allowed
if not success: if not success:
all_prims += child_prim.GetChildren() all_prims += child_prim.GetChildren()
else:
count_success += 1
# check if we were successful in applying the function to any prim
if count_success == 0:
carb.log_warn(
f"Could not perform '{func.__name__}' on any prims under: '{prim_path}'."
" This might be because of the following reasons:"
"\n\t(1) The desired attribute does not exist on any of the prims."
"\n\t(2) The desired attribute exists on an instanced prim."
f"\n\t\tDiscovered list of instanced prim paths: {instanced_prim_paths}"
)
return wrapper return wrapper
...@@ -208,7 +223,7 @@ def clone(func: Callable) -> Callable: ...@@ -208,7 +223,7 @@ def clone(func: Callable) -> Callable:
is_regex_expression = re.match(r"^[a-zA-Z0-9/_]+$", root_path) is None is_regex_expression = re.match(r"^[a-zA-Z0-9/_]+$", root_path) is None
# resolve matching prims for source prim path expression # resolve matching prims for source prim path expression
if is_regex_expression: if is_regex_expression and root_path != "":
source_prim_paths = prim_utils.find_matching_prim_paths(root_path) source_prim_paths = prim_utils.find_matching_prim_paths(root_path)
# if no matching prims are found, raise an error # if no matching prims are found, raise an error
if len(source_prim_paths) == 0: if len(source_prim_paths) == 0:
......
...@@ -12,8 +12,11 @@ For more information on Omniverse Nucleus: ...@@ -12,8 +12,11 @@ For more information on Omniverse Nucleus:
https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/overview.html#omniverse-nucleus https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/overview.html#omniverse-nucleus
""" """
from __future__ import annotations
import io import io
import os import os
import tempfile
from typing_extensions import Literal from typing_extensions import Literal
import carb import carb
...@@ -60,6 +63,53 @@ def check_file_path(path: str) -> Literal[0, 1, 2]: ...@@ -60,6 +63,53 @@ def check_file_path(path: str) -> Literal[0, 1, 2]:
return 0 return 0
def retrieve_file_path(path: str, download_dir: str | None = None, force_download: bool = True) -> str:
"""Retrieves the path to a file on the Nucleus Server or locally.
If the file exists locally, then the absolute path to the file is returned.
If the file exists on the Nucleus Server, then the file is downloaded to the local machine
and the absolute path to the file is returned.
Args:
path: The path to the file.
download_dir: The directory where the file should be downloaded. Defaults to None, in which
case the file is downloaded to the system's temporary directory.
force_download: Whether to force download the file from the Nucleus Server. This will overwrite
the local file if it exists. Defaults to True.
Returns:
The path to the file on the local machine.
Raises:
FileNotFoundError: When the file not found locally or on Nucleus Server.
RuntimeError: When the file cannot be copied from the Nucleus Server to the local machine. This
can happen when the file already exists locally and :attr:`force_download` is set to False.
"""
# check file status
file_status = check_file_path(path)
if file_status == 1:
return os.path.abspath(path)
elif file_status == 2:
# resolve download directory
if download_dir is None:
download_dir = tempfile.gettempdir()
else:
download_dir = os.path.abspath(download_dir)
# create download directory if it does not exist
if not os.path.exists(download_dir):
os.makedirs(download_dir)
# download file in temp directory using os
file_name = os.path.basename(omni.client.break_url(path).path)
target_path = os.path.join(download_dir, file_name)
# copy file to local machine
result = omni.client.copy(path, target_path)
if result != omni.client.Result.OK and not force_download:
raise RuntimeError(f"Unable to copy file: '{path}'. File already exists locally at: {target_path}")
return os.path.abspath(target_path)
else:
raise FileNotFoundError(f"Unable to find the file: {path}")
def read_file(path: str) -> io.BytesIO: def read_file(path: str) -> io.BytesIO:
"""Reads a file from the Nucleus Server or locally. """Reads a file from the Nucleus Server or locally.
......
...@@ -87,8 +87,8 @@ def wrap_to_pi(angles: torch.Tensor) -> torch.Tensor: ...@@ -87,8 +87,8 @@ def wrap_to_pi(angles: torch.Tensor) -> torch.Tensor:
Angles in the range [-pi, pi]. Angles in the range [-pi, pi].
""" """
angles = angles.clone() angles = angles.clone()
angles %= 2 * np.pi angles %= 2 * torch.pi
angles -= 2 * np.pi * (angles > np.pi) angles -= 2 * torch.pi * (angles > torch.pi)
return angles return angles
...@@ -418,14 +418,14 @@ def euler_xyz_from_quat(quat: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, ...@@ -418,14 +418,14 @@ def euler_xyz_from_quat(quat: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor,
# pitch (y-axis rotation) # pitch (y-axis rotation)
sin_pitch = 2.0 * (q_w * q_y - q_z * q_x) sin_pitch = 2.0 * (q_w * q_y - q_z * q_x)
pitch = torch.where(torch.abs(sin_pitch) >= 1, copysign(np.pi / 2.0, sin_pitch), torch.asin(sin_pitch)) pitch = torch.where(torch.abs(sin_pitch) >= 1, copysign(torch.pi / 2.0, sin_pitch), torch.asin(sin_pitch))
# yaw (z-axis rotation) # yaw (z-axis rotation)
sin_yaw = 2.0 * (q_w * q_z + q_x * q_y) sin_yaw = 2.0 * (q_w * q_z + q_x * q_y)
cos_yaw = 1 - 2 * (q_y * q_y + q_z * q_z) cos_yaw = 1 - 2 * (q_y * q_y + q_z * q_z)
yaw = torch.atan2(sin_yaw, cos_yaw) yaw = torch.atan2(sin_yaw, cos_yaw)
return roll % (2 * np.pi), pitch % (2 * np.pi), yaw % (2 * np.pi) # TODO: why not wrap_to_pi here ? return roll % (2 * torch.pi), pitch % (2 * torch.pi), yaw % (2 * torch.pi) # TODO: why not wrap_to_pi here ?
@torch.jit.script @torch.jit.script
...@@ -969,7 +969,7 @@ def random_yaw_orientation(num: int, device: str) -> torch.Tensor: ...@@ -969,7 +969,7 @@ def random_yaw_orientation(num: int, device: str) -> torch.Tensor:
""" """
roll = torch.zeros(num, dtype=torch.float, device=device) roll = torch.zeros(num, dtype=torch.float, device=device)
pitch = torch.zeros(num, dtype=torch.float, device=device) pitch = torch.zeros(num, dtype=torch.float, device=device)
yaw = 2 * np.pi * torch.rand(num, dtype=torch.float, device=device) yaw = 2 * torch.pi * torch.rand(num, dtype=torch.float, device=device)
return quat_from_euler_xyz(roll, pitch, yaw) return quat_from_euler_xyz(roll, pitch, yaw)
...@@ -1041,7 +1041,7 @@ def sample_cylinder( ...@@ -1041,7 +1041,7 @@ def sample_cylinder(
Sampled tensor of shape :obj:`(*size, 3)`. Sampled tensor of shape :obj:`(*size, 3)`.
""" """
# sample angles # sample angles
angles = (torch.rand(size, device=device) * 2 - 1) * np.pi angles = (torch.rand(size, device=device) * 2 - 1) * torch.pi
h_min, h_max = h_range h_min, h_max = h_range
# add shape # add shape
if isinstance(size, int): if isinstance(size, int):
......
...@@ -49,6 +49,6 @@ __version__ = ORBIT_TASKS_METADATA["package"]["version"] ...@@ -49,6 +49,6 @@ __version__ = ORBIT_TASKS_METADATA["package"]["version"]
from .utils import import_packages from .utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages # The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "utils", "classic.ant", "classic.humanoid"] _BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "utils"]
# Import all configs in this package # Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS) import_packages(__name__, _BLACKLIST_PKGS)
...@@ -7,21 +7,23 @@ ...@@ -7,21 +7,23 @@
Ant locomotion environment (similar to OpenAI Gym Ant-v2). Ant locomotion environment (similar to OpenAI Gym Ant-v2).
""" """
# import gymnasium as gym import gymnasium as gym
# from . import agents from . import agents, ant_env_cfg
## ##
# Register Gym environments. # Register Gym environments.
## ##
# gym.register( gym.register(
# id="Isaac-Ant-v0", id="Isaac-Ant-v0",
# entry_point="omni.isaac.orbit.envs:RLTaskEnv", entry_point="omni.isaac.orbit.envs:RLTaskEnv",
# kwargs={ disable_env_checker=True,
# "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml", kwargs={
# "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "env_cfg_entry_point": ant_env_cfg.AntEnvCfg,
# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": agents.rsl_rl_ppo_cfg.AntPPORunnerCfg,
# "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
# }, "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
# ) "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_ppo_cfg
...@@ -45,13 +45,13 @@ params: ...@@ -45,13 +45,13 @@ params:
device_name: 'cuda:0' device_name: 'cuda:0'
multi_gpu: False multi_gpu: False
ppo: True ppo: True
mixed_precision: False mixed_precision: True
normalize_input: True normalize_input: True
normalize_value: True normalize_value: True
value_bootstrap: False value_bootstrap: True
num_actors: -1 num_actors: -1
reward_shaper: reward_shaper:
scale_value: 0.01 scale_value: 0.6
normalize_advantage: True normalize_advantage: True
gamma: 0.99 gamma: 0.99
tau: 0.95 tau: 0.95
...@@ -61,16 +61,16 @@ params: ...@@ -61,16 +61,16 @@ params:
kl_threshold: 0.008 kl_threshold: 0.008
score_to_win: 20000 score_to_win: 20000
max_epochs: 500 max_epochs: 500
save_best_after: 200 save_best_after: 100
save_frequency: 50 save_frequency: 50
grad_norm: 1.0 grad_norm: 1.0
entropy_coef: 0.0 entropy_coef: 0.0
truncate_grads: True truncate_grads: True
e_clip: 0.2 e_clip: 0.2
horizon_length: 16 horizon_length: 16
minibatch_size: 4096 # 32768 minibatch_size: 32768
mini_epochs: 8 mini_epochs: 4
critic_coef: 2 critic_coef: 2
clip_value: True clip_value: True
seq_len: 4 seq_length: 4
bounds_loss_coef: 0.0001 bounds_loss_coef: 0.0001
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class AntPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 32
max_iterations = 1000
save_interval = 50
experiment_name = "ant"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[400, 200, 100],
critic_hidden_dims=[400, 200, 100],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L161 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L161
seed: 42 seed: 42
n_timesteps: !!float 2e6 n_timesteps: !!float 1e7
policy: 'MlpPolicy' policy: 'MlpPolicy'
batch_size: 128 batch_size: 128
n_steps: 512 n_steps: 512
...@@ -16,8 +16,8 @@ learning_rate: !!float 3e-5 ...@@ -16,8 +16,8 @@ learning_rate: !!float 3e-5
use_sde: True use_sde: True
clip_range: 0.4 clip_range: 0.4
policy_kwargs: "dict( policy_kwargs: "dict(
log_std_init=-2, log_std_init=-1,
ortho_init=False, ortho_init=False,
activation_fn=nn.ReLU, activation_fn=nn.ReLU,
net_arch=[dict(pi=[256, 256], vf=[256, 256])] net_arch=dict(pi=[256, 256], vf=[256, 256])
)" )"
# parameters for the MDP
env:
# general
num_envs: 1024
env_spacing: 5
episode_length: 1000
# step parameters
control_frequency_inv: 2 # 60 Hz
# actions parameters
power_scale: 0.5
# observations parameters
angular_velocity_scale: 1.0
dof_velocity_scale: 0.2
contact_force_scale: 0.1
# reward parameters
heading_weight: 0.5
up_weight: 0.1
alive_weight: 0.5
# cost parameters
actions_cost: 0.005
energy_cost: 0.05
joints_at_limit_cost: 0.1
# termination parameters
death_cost: -2.0
termination_height: 0.31
# parameters for setting up the scene
scene:
ant:
# articulation
solver_position_iteration_count: 4
solver_velocity_iteration_count: 0
sleep_threshold: 0.005
stabilization_threshold: 0.001
enable_self_collisions: True
# per-body
enable_gyroscopic_forces: True
max_depenetration_velocity: 10.0
# per-shape
contact_offset: 0.02
rest_offset: 0.0
# parameters for physics engine
sim:
dt: 0.0083 # 1/120 s
substeps: 1
gravity: [0.0, 0.0, -9.81]
enable_scene_query_support: False
use_gpu_pipeline: True
use_flatcache: True # deprecated from Isaac Sim 2023.1 onwards
use_fabric: True # used from Isaac Sim 2023.1 onwards
device: "cuda:0"
physx:
solver_type: 1
use_gpu: True # set to False to run on CPU
bounce_threshold_velocity: 0.2
friction_offset_threshold: 0.04
friction_correlation_distance: 0.025
enable_stabilization: True
# GPU buffers
gpu_max_rigid_contact_count: 524288
gpu_max_rigid_patch_count: 81920
gpu_found_lost_pairs_capacity: 1024
gpu_found_lost_aggregate_pairs_capacity: 262144
gpu_total_aggregate_pairs_capacity: 2048
gpu_max_soft_body_contacts: 1048576
gpu_max_particle_contacts: 1048576
gpu_heap_capacity: 67108864
gpu_temp_buffer_capacity: 16777216
gpu_max_num_partitions: 8
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.actuators import ImplicitActuatorCfg
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.command_generators import NullCommandGeneratorCfg
from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.terrains import TerrainImporterCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
import omni.isaac.orbit_tasks.classic.humanoid.mdp as mdp
##
# Scene definition
##
@configclass
class MySceneCfg(InteractiveSceneCfg):
"""Configuration for the terrain scene with an ant robot."""
# terrain
terrain = TerrainImporterCfg(
prim_path="/World/ground",
terrain_type="plane",
collision_group=-1,
physics_material=sim_utils.RigidBodyMaterialCfg(
friction_combine_mode="average",
restitution_combine_mode="average",
static_friction=1.0,
dynamic_friction=1.0,
restitution=0.0,
),
debug_vis=False,
)
# robot
robot = ArticulationCfg(
prim_path="{ENV_REGEX_NS}/Robot",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Ant/ant_instanceable.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
max_depenetration_velocity=10.0,
enable_gyroscopic_forces=True,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,
solver_position_iteration_count=4,
solver_velocity_iteration_count=0,
sleep_threshold=0.005,
stabilization_threshold=0.001,
),
copy_from_source=False,
),
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 0.5),
joint_pos={".*": 0.0},
),
actuators={
"body": ImplicitActuatorCfg(
joint_names_expr=[".*"],
stiffness=0.0,
damping=0.0,
),
},
)
# lights
light = AssetBaseCfg(
prim_path="/World/light",
spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
)
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=[".*"], scale=7.5)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for the policy."""
base_height = ObsTerm(func=mdp.base_pos_z)
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
base_yaw_roll = ObsTerm(func=mdp.base_yaw_roll)
base_angle_to_target = ObsTerm(func=mdp.base_angle_to_target, params={"target_pos": (1000.0, 0.0, 0.0)})
base_up_proj = ObsTerm(func=mdp.base_up_proj)
base_heading_proj = ObsTerm(func=mdp.base_heading_proj, params={"target_pos": (1000.0, 0.0, 0.0)})
joint_pos_norm = ObsTerm(func=mdp.joint_pos_norm)
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel, scale=0.2)
feet_body_forces = ObsTerm(
func=mdp.body_incoming_wrench,
scale=0.1,
params={
"asset_cfg": SceneEntityCfg(
"robot", body_names=["front_left_foot", "front_right_foot", "left_back_foot", "right_back_foot"]
)
},
)
actions = ObsTerm(func=mdp.last_action)
def __post_init__(self):
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RandomizationCfg:
"""Configuration for randomization."""
reset_base = RandTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={"pose_range": {}, "velocity_range": {}},
)
reset_robot_joints = RandTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"position_range": (-0.2, 0.2),
"velocity_range": (-0.1, 0.1),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Reward for moving forward
progress = RewTerm(func=mdp.progress_reward, weight=1.0, params={"target_pos": (1000.0, 0.0, 0.0)})
# (2) Stay alive bonus
alive = RewTerm(func=mdp.is_alive, weight=0.5)
# (3) Reward for non-upright posture
upright = RewTerm(func=mdp.upright_posture_bonus, weight=0.1, params={"threshold": 0.93})
# (4) Reward for moving in the right direction
move_to_target = RewTerm(
func=mdp.move_to_target_bonus, weight=0.5, params={"threshold": 0.8, "target_pos": (1000.0, 0.0, 0.0)}
)
# (5) Penalty for large action commands
action_l2 = RewTerm(func=mdp.action_l2, weight=-0.005)
# (6) Penalty for energy consumption
energy = RewTerm(func=mdp.power_consumption, weight=-0.05, params={"gear_ratio": {".*": 15.0}})
# (7) Penalty for reaching close to joint limits
joint_limits = RewTerm(
func=mdp.joint_limits_penalty_ratio, weight=-0.1, params={"threshold": 0.99, "gear_ratio": {".*": 15.0}}
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Terminate if the episode length is exceeded
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Terminate if the robot falls
torso_height = DoneTerm(func=mdp.base_height, params={"minimum_height": 0.31})
@configclass
class CurriculumCfg:
"""Curriculum terms for the MDP."""
pass
@configclass
class AntEnvCfg(RLTaskEnvCfg):
"""Configuration for the MuJoCo-style Ant walking environment."""
# Scene settings
scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=5.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
commands: NullCommandGeneratorCfg = NullCommandGeneratorCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
randomization: RandomizationCfg = RandomizationCfg()
curriculum: CurriculumCfg = CurriculumCfg()
def __post_init__(self):
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 16.0
# simulation settings
self.sim.dt = 1 / 120.0
self.sim.physx.bounce_threshold_velocity = 0.2
# default friction material
self.sim.physics_material.static_friction = 1.0
self.sim.physics_material.dynamic_friction = 1.0
self.sim.physics_material.restitution = 0.0
...@@ -148,9 +148,9 @@ class RewardsCfg: ...@@ -148,9 +148,9 @@ class RewardsCfg:
"""Reward terms for the MDP.""" """Reward terms for the MDP."""
# (1) Constant running reward # (1) Constant running reward
alive = RewTerm(func=mdp.alive_bonus, weight=1.0) alive = RewTerm(func=mdp.is_alive, weight=1.0)
# (2) Failure penalty # (2) Failure penalty
terminating = RewTerm(func=mdp.termination_penalty, weight=-2.0) terminating = RewTerm(func=mdp.is_terminated, weight=-2.0)
# (3) Primary task: keep pole upright # (3) Primary task: keep pole upright
pole_pos = RewTerm( pole_pos = RewTerm(
func=mdp.joint_pos_target_l2, func=mdp.joint_pos_target_l2,
......
...@@ -7,21 +7,23 @@ ...@@ -7,21 +7,23 @@
Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2). Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2).
""" """
# import gymnasium as gym import gymnasium as gym
# from . import agents from . import agents, humanoid_env_cfg
## ##
# Register Gym environments. # Register Gym environments.
## ##
# gym.register( gym.register(
# id="Isaac-Humanoid-v0", id="Isaac-Humanoid-v0",
# entry_point="omni.isaac.orbit.envs:RLTaskEnv", entry_point="omni.isaac.orbit.envs:RLTaskEnv",
# kwargs={ disable_env_checker=True,
# "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml", kwargs={
# "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "env_cfg_entry_point": humanoid_env_cfg.HumanoidEnvCfg,
# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": agents.rsl_rl_ppo_cfg.HumanoidPPORunnerCfg,
# "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
# }, "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
# ) "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_ppo_cfg # noqa: F401, F403
...@@ -45,32 +45,31 @@ params: ...@@ -45,32 +45,31 @@ params:
device_name: 'cuda:0' device_name: 'cuda:0'
multi_gpu: False multi_gpu: False
ppo: True ppo: True
mixed_precision: False mixed_precision: True
normalize_input: True normalize_input: True
normalize_value: True normalize_value: True
value_bootstrap: False value_bootstrap: True
num_actors: -1 num_actors: -1
reward_shaper: reward_shaper:
scale_value: 0.01 scale_value: 0.6
normalize_advantage: True normalize_advantage: True
gamma: 0.99 gamma: 0.99
tau: 0.95 tau: 0.95
learning_rate: 3e-4 learning_rate: 5e-4
lr_schedule: adaptive lr_schedule: adaptive
schedule_type: legacy kl_threshold: 0.01
kl_threshold: 0.008
score_to_win: 20000 score_to_win: 20000
max_epochs: 500 max_epochs: 1000
save_best_after: 500 save_best_after: 100
save_frequency: 100 save_frequency: 100
grad_norm: 1.0 grad_norm: 1.0
entropy_coef: 0.0 entropy_coef: 0.0
truncate_grads: True truncate_grads: True
e_clip: 0.2 e_clip: 0.2
horizon_length: 32 horizon_length: 32
minibatch_size: 4096 # 32768 minibatch_size: 32768
mini_epochs: 8 mini_epochs: 5
critic_coef: 2 critic_coef: 4
clip_value: True clip_value: True
seq_len: 4 seq_length: 4
bounds_loss_coef: 0.0001 bounds_loss_coef: 0.0001
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 32
max_iterations = 1000
save_interval = 50
experiment_name = "humanoid"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[400, 200, 100],
critic_hidden_dims=[400, 200, 100],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=5.0e-4,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L457 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L245
seed: 42 seed: 42
policy: 'MlpPolicy' policy: 'MlpPolicy'
n_timesteps: !!float 1e7 n_timesteps: !!float 5e7
batch_size: 256 batch_size: 256
n_steps: 512 n_steps: 512
gamma: 0.95 gamma: 0.99
learning_rate: 3.56987e-05 learning_rate: !!float 2.5e-4
ent_coef: 0.00238306 ent_coef: 0.0
clip_range: 0.3 clip_range: 0.2
n_epochs: 5 n_epochs: 10
gae_lambda: 0.9 gae_lambda: 0.95
max_grad_norm: 2 max_grad_norm: 1.0
vf_coef: 0.431892 vf_coef: 0.5
policy_kwargs: "dict( policy_kwargs: "dict(
log_std_init=-2, log_std_init=-2,
ortho_init=False, ortho_init=False,
activation_fn=nn.ReLU, activation_fn=nn.ReLU,
net_arch=[dict(pi=[256, 256], vf=[256, 256])] net_arch=dict(pi=[256, 256], vf=[256, 256])
)" )"
# parameters for the MDP
env:
# general
num_envs: 1024
env_spacing: 5
episode_length: 1000
# step parameters
control_frequency_inv: 2 # 60 Hz
# actions parameters
power_scale: 1.0
# observations parameters
angular_velocity_scale: 0.25
dof_velocity_scale: 0.1
contact_force_scale: 0.01
# reward parameters
heading_weight: 0.5
up_weight: 0.1
alive_weight: 2.0
# cost parameters
actions_cost: 0.01
energy_cost: 0.05
joints_at_limit_cost: 0.25
# termination parameters
death_cost: -1.0
termination_height: 0.8
# parameters for setting up the scene
scene:
humanoid:
# articulation
solver_position_iteration_count: 4
solver_velocity_iteration_count: 0
sleep_threshold: 0.005
stabilization_threshold: 0.001
enable_self_collisions: True
# per-body
enable_gyroscopic_forces: True
max_depenetration_velocity: 10.0
# per-shape
contact_offset: 0.02
rest_offset: 0.0
# parameters for physics engine
sim:
dt: 0.0083 # 1/120 s
substeps: 1
gravity: [0.0, 0.0, -9.81]
enable_scene_query_support: False
use_gpu_pipeline: True
use_flatcache: True # deprecated from Isaac Sim 2023.1 onwards
use_fabric: True # used from Isaac Sim 2023.1 onwards
device: "cuda:0"
physx:
solver_type: 1
use_gpu: True # set to False to run on CPU
bounce_threshold_velocity: 0.2
friction_offset_threshold: 0.04
friction_correlation_distance: 0.025
enable_stabilization: True
# GPU buffers
gpu_max_rigid_contact_count: 524288
gpu_max_rigid_patch_count: 81920
gpu_found_lost_pairs_capacity: 1024
gpu_found_lost_aggregate_pairs_capacity: 262144
gpu_total_aggregate_pairs_capacity: 2048
gpu_max_soft_body_contacts: 1048576
gpu_max_particle_contacts: 1048576
gpu_heap_capacity: 67108864
gpu_temp_buffer_capacity: 16777216
gpu_max_num_partitions: 8
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.actuators import ImplicitActuatorCfg
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.command_generators import NullCommandGeneratorCfg
from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.terrains import TerrainImporterCfg
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.assets import ISAAC_NUCLEUS_DIR
import omni.isaac.orbit_tasks.classic.humanoid.mdp as mdp
##
# Scene definition
##
@configclass
class MySceneCfg(InteractiveSceneCfg):
"""Configuration for the terrain scene with a humanoid robot."""
# terrain
terrain = TerrainImporterCfg(
prim_path="/World/ground",
terrain_type="plane",
collision_group=-1,
physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=1.0, dynamic_friction=1.0, restitution=0.0),
debug_vis=False,
)
# robot
robot = ArticulationCfg(
prim_path="{ENV_REGEX_NS}/Robot",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Humanoid/humanoid_instanceable.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=None,
max_depenetration_velocity=10.0,
enable_gyroscopic_forces=True,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True,
solver_position_iteration_count=4,
solver_velocity_iteration_count=0,
sleep_threshold=0.005,
stabilization_threshold=0.001,
),
copy_from_source=False,
),
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 1.34),
joint_pos={".*": 0.0},
),
actuators={
"body": ImplicitActuatorCfg(
joint_names_expr=[".*"],
stiffness={
".*_waist.*": 20.0,
".*_upper_arm.*": 10.0,
"pelvis": 10.0,
".*_lower_arm": 2.0,
".*_thigh:0": 10.0,
".*_thigh:1": 20.0,
".*_thigh:2": 10.0,
".*_shin": 5.0,
".*_foot.*": 2.0,
},
damping={
".*_waist.*": 5.0,
".*_upper_arm.*": 5.0,
"pelvis": 5.0,
".*_lower_arm": 1.0,
".*_thigh:0": 5.0,
".*_thigh:1": 5.0,
".*_thigh:2": 5.0,
".*_shin": 0.1,
".*_foot.*": 1.0,
},
),
},
)
# lights
light = AssetBaseCfg(
prim_path="/World/light",
spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
)
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
joint_effort = mdp.JointEffortActionCfg(
asset_name="robot",
joint_names=[".*"],
scale={
".*_waist.*": 67.5,
".*_upper_arm.*": 67.5,
"pelvis": 67.5,
".*_lower_arm": 45.0,
".*_thigh:0": 45.0,
".*_thigh:1": 135.0,
".*_thigh:2": 45.0,
".*_shin": 90.0,
".*_foot.*": 22.5,
},
)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for the policy."""
base_height = ObsTerm(func=mdp.base_pos_z)
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel, scale=0.25)
base_yaw_roll = ObsTerm(func=mdp.base_yaw_roll)
base_angle_to_target = ObsTerm(func=mdp.base_angle_to_target, params={"target_pos": (1000.0, 0.0, 0.0)})
base_up_proj = ObsTerm(func=mdp.base_up_proj)
base_heading_proj = ObsTerm(func=mdp.base_heading_proj, params={"target_pos": (1000.0, 0.0, 0.0)})
joint_pos_norm = ObsTerm(func=mdp.joint_pos_norm)
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel, scale=0.1)
feet_body_forces = ObsTerm(
func=mdp.body_incoming_wrench,
scale=0.01,
params={"asset_cfg": SceneEntityCfg("robot", body_names=["left_foot", "right_foot"])},
)
actions = ObsTerm(func=mdp.last_action)
def __post_init__(self):
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RandomizationCfg:
"""Configuration for randomization."""
reset_base = RandTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={"pose_range": {}, "velocity_range": {}},
)
reset_robot_joints = RandTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"position_range": (-0.2, 0.2),
"velocity_range": (-0.1, 0.1),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Reward for moving forward
progress = RewTerm(func=mdp.progress_reward, weight=1.0, params={"target_pos": (1000.0, 0.0, 0.0)})
# (2) Stay alive bonus
alive = RewTerm(func=mdp.is_alive, weight=2.0)
# (3) Reward for non-upright posture
upright = RewTerm(func=mdp.upright_posture_bonus, weight=0.1, params={"threshold": 0.93})
# (4) Reward for moving in the right direction
move_to_target = RewTerm(
func=mdp.move_to_target_bonus, weight=0.5, params={"threshold": 0.8, "target_pos": (1000.0, 0.0, 0.0)}
)
# (5) Penalty for large action commands
action_l2 = RewTerm(func=mdp.action_l2, weight=-0.01)
# (6) Penalty for energy consumption
energy = RewTerm(
func=mdp.power_consumption,
weight=-0.005,
params={
"gear_ratio": {
".*_waist.*": 67.5,
".*_upper_arm.*": 67.5,
"pelvis": 67.5,
".*_lower_arm": 45.0,
".*_thigh:0": 45.0,
".*_thigh:1": 135.0,
".*_thigh:2": 45.0,
".*_shin": 90.0,
".*_foot.*": 22.5,
}
},
)
# (7) Penalty for reaching close to joint limits
joint_limits = RewTerm(
func=mdp.joint_limits_penalty_ratio,
weight=-0.25,
params={
"threshold": 0.98,
"gear_ratio": {
".*_waist.*": 67.5,
".*_upper_arm.*": 67.5,
"pelvis": 67.5,
".*_lower_arm": 45.0,
".*_thigh:0": 45.0,
".*_thigh:1": 135.0,
".*_thigh:2": 45.0,
".*_shin": 90.0,
".*_foot.*": 22.5,
},
},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Terminate if the episode length is exceeded
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Terminate if the robot falls
torso_height = DoneTerm(func=mdp.base_height, params={"minimum_height": 0.8})
@configclass
class CurriculumCfg:
"""Curriculum terms for the MDP."""
pass
@configclass
class HumanoidEnvCfg(RLTaskEnvCfg):
"""Configuration for the MuJoCo-style Humanoid walking environment."""
# Scene settings
scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=5.0)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
commands: NullCommandGeneratorCfg = NullCommandGeneratorCfg()
# MDP settings
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
randomization: RandomizationCfg = RandomizationCfg()
curriculum: CurriculumCfg = CurriculumCfg()
def __post_init__(self):
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 16.0
# simulation settings
self.sim.dt = 1 / 120.0
self.sim.physx.bounce_threshold_velocity = 0.2
# default friction material
self.sim.physics_material.static_friction = 1.0
self.sim.physics_material.dynamic_friction = 1.0
self.sim.physics_material.restitution = 0.0
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the humanoid environment."""
from omni.isaac.orbit.envs.mdp import * # noqa: F401, F403
from .observations import *
from .rewards import *
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
import omni.isaac.orbit.utils.math as math_utils
from omni.isaac.orbit.assets import Articulation
from omni.isaac.orbit.managers import SceneEntityCfg
if TYPE_CHECKING:
from ..humanoid_env import HumanoidEnv
def base_yaw_roll(env: HumanoidEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Yaw and roll of the base in the simulation world frame."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# extract euler angles (in world frame)
roll, _, yaw = math_utils.euler_xyz_from_quat(asset.data.root_quat_w)
# normalize angle to [-pi, pi]
roll = torch.atan2(torch.sin(roll), torch.cos(roll))
yaw = torch.atan2(torch.sin(yaw), torch.cos(yaw))
return torch.cat((yaw.unsqueeze(-1), roll.unsqueeze(-1)), dim=-1)
def base_up_proj(env: HumanoidEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Projection of the base up vector onto the world up vector."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# compute base up vector
base_up_vec = math_utils.quat_rotate(asset.data.root_quat_w, -asset._GRAVITY_VEC_W) # type: ignore
return base_up_vec[:, 2].unsqueeze(-1)
def base_heading_proj(
env: HumanoidEnv, target_pos: tuple[float, float, float], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Projection of the base forward vector onto the world forward vector."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# compute desired heading direction
to_target_pos = torch.tensor(target_pos, device=env.device) - asset.data.root_pos_w[:, :3]
to_target_pos[:, 2] = 0.0
to_target_dir = math_utils.normalize(to_target_pos)
# compute base forward vector
heading_vec = math_utils.quat_rotate(asset.data.root_quat_w, asset._FORWARD_VEC_B) # type: ignore
# compute dot product between heading and target direction
heading_proj = torch.bmm(heading_vec.view(env.num_envs, 1, 3), to_target_dir.view(env.num_envs, 3, 1))
return heading_proj.view(env.num_envs, 1)
def base_angle_to_target(
env: HumanoidEnv, target_pos: tuple[float, float, float], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Angle between the base forward vector and the vector to the target."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# compute desired heading direction
to_target_pos = torch.tensor(target_pos, device=env.device) - asset.data.root_pos_w[:, :3]
walk_target_angle = torch.atan2(to_target_pos[:, 1], to_target_pos[:, 0])
# compute base forward vector
_, _, yaw = math_utils.euler_xyz_from_quat(asset.data.root_quat_w)
# normalize angle to target to [-pi, pi]
angle_to_target = walk_target_angle - yaw
angle_to_target = torch.atan2(torch.sin(angle_to_target), torch.cos(angle_to_target))
return angle_to_target.unsqueeze(-1)
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
import omni.isaac.orbit.utils.math as math_utils
import omni.isaac.orbit.utils.string as string_utils
from omni.isaac.orbit.assets import Articulation
from omni.isaac.orbit.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg
from . import observations as obs
if TYPE_CHECKING:
from ..humanoid_env import HumanoidEnv
def upright_posture_bonus(
env: HumanoidEnv, threshold: float, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Reward for maintaining an upright posture."""
up_proj = obs.base_up_proj(env, asset_cfg).squeeze(-1)
return (up_proj > threshold).float()
def move_to_target_bonus(
env: HumanoidEnv,
threshold: float,
target_pos: tuple[float, float, float],
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
) -> torch.Tensor:
"""Reward for moving to the target heading."""
heading_proj = obs.base_heading_proj(env, target_pos, asset_cfg).squeeze(-1)
return torch.where(heading_proj > threshold, 1.0, heading_proj / threshold)
class progress_reward(ManagerTermBase):
"""Reward for making progress towards the target."""
def __init__(self, env: HumanoidEnv, cfg: RewardTermCfg):
# initialize the base class
super().__init__(cfg, env)
# create history buffer
self.potentials = torch.zeros(env.num_envs, device=env.device)
self.prev_potentials = torch.zeros_like(self.potentials)
def reset(self, env_ids: torch.Tensor):
# extract the used quantities (to enable type-hinting)
asset: Articulation = self._env.scene["robot"]
# compute projection of current heading to desired heading vector
target_pos = torch.tensor(self.cfg.params["target_pos"], device=self.device)
to_target_pos = target_pos - asset.data.root_pos_w[env_ids, :3]
# reward terms
self.potentials[env_ids] = -torch.norm(to_target_pos, p=2, dim=-1) / self._env.step_dt
self.prev_potentials[env_ids] = self.potentials[env_ids]
def __call__(
self,
env: HumanoidEnv,
target_pos: tuple[float, float, float],
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
) -> torch.Tensor:
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# compute vector to target
target_pos = torch.tensor(target_pos, device=env.device)
to_target_pos = target_pos - asset.data.root_pos_w[:, :3]
to_target_pos[:, 2] = 0.0
# update history buffer and compute new potential
self.prev_potentials[:] = self.potentials[:]
self.potentials[:] = -torch.norm(to_target_pos, p=2, dim=-1) / env.step_dt
return self.potentials - self.prev_potentials
class joint_limits_penalty_ratio(ManagerTermBase):
"""Penalty for violating joint limits weighted by the gear ratio."""
def __init__(self, env: HumanoidEnv, cfg: RewardTermCfg):
# add default argument
if "asset_cfg" not in cfg.params:
cfg.params["asset_cfg"] = SceneEntityCfg("robot")
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[cfg.params["asset_cfg"].name]
# resolve the gear ratio for each joint
self.gear_ratio = torch.ones(env.num_envs, asset.num_joints, device=env.device)
index_list, _, value_list = string_utils.resolve_matching_names_values(
cfg.params["gear_ratio"], asset.joint_names
)
self.gear_ratio[:, index_list] = torch.tensor(value_list, device=env.device)
self.gear_ratio_scaled = self.gear_ratio / torch.max(self.gear_ratio)
def __call__(
self, env: HumanoidEnv, threshold: float, gear_ratio: dict[str, float], asset_cfg: SceneEntityCfg
) -> torch.Tensor:
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# compute the penalty over normalized joints
joint_pos_scaled = math_utils.scale_transform(
asset.data.joint_pos, asset.data.soft_joint_pos_limits[..., 0], asset.data.soft_joint_pos_limits[..., 1]
)
# scale the violation amount by the gear ratio
violation_amount = (torch.abs(joint_pos_scaled) - threshold) / (1 - threshold)
violation_amount = violation_amount * self.gear_ratio_scaled
return torch.sum((torch.abs(joint_pos_scaled) > threshold) * violation_amount, dim=-1)
class power_consumption(ManagerTermBase):
"""Penalty for the power consumed by the actions to the environment.
This is computed as commanded torque times the joint velocity.
"""
def __init__(self, env: HumanoidEnv, cfg: RewardTermCfg):
# add default argument
if "asset_cfg" not in cfg.params:
cfg.params["asset_cfg"] = SceneEntityCfg("robot")
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[cfg.params["asset_cfg"].name]
# resolve the gear ratio for each joint
self.gear_ratio = torch.ones(env.num_envs, asset.num_joints, device=env.device)
index_list, _, value_list = string_utils.resolve_matching_names_values(
cfg.params["gear_ratio"], asset.joint_names
)
self.gear_ratio[:, index_list] = torch.tensor(value_list, device=env.device)
self.gear_ratio_scaled = self.gear_ratio / torch.max(self.gear_ratio)
def __call__(self, env: HumanoidEnv, gear_ratio: dict[str, float], asset_cfg: SceneEntityCfg) -> torch.Tensor:
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# return power = torque * velocity (here actions: joint torques)
return torch.sum(torch.abs(env.action_manager.action * asset.data.joint_vel * self.gear_ratio_scaled), dim=-1)
...@@ -241,6 +241,10 @@ class RlGamesVecEnvWrapper(IVecEnv): ...@@ -241,6 +241,10 @@ class RlGamesVecEnvWrapper(IVecEnv):
actions = torch.clamp(actions, -self._clip_actions, self._clip_actions) actions = torch.clamp(actions, -self._clip_actions, self._clip_actions)
# perform environment step # perform environment step
obs_dict, rew, terminated, truncated, extras = self.env.step(actions) obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
# move time out information to the extras dict
# note: only useful when `value_bootstrap` is True in the agent configuration
extras["time_outs"] = truncated.to(device=self._rl_device)
# process observations and states # process observations and states
obs_and_states = self._process_obs(obs_dict) obs_and_states = self._process_obs(obs_dict)
# move buffers to rl-device # move buffers to rl-device
......
...@@ -48,6 +48,8 @@ from rl_games.common import env_configurations, vecenv ...@@ -48,6 +48,8 @@ from rl_games.common import env_configurations, vecenv
from rl_games.common.player import BasePlayer from rl_games.common.player import BasePlayer
from rl_games.torch_runner import Runner from rl_games.torch_runner import Runner
from omni.isaac.orbit.utils.assets import retrieve_file_path
import omni.isaac.contrib_tasks # noqa: F401 import omni.isaac.contrib_tasks # noqa: F401
import omni.isaac.orbit_tasks # noqa: F401 import omni.isaac.orbit_tasks # noqa: F401
from omni.isaac.orbit_tasks.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg from omni.isaac.orbit_tasks.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
...@@ -94,7 +96,7 @@ def main(): ...@@ -94,7 +96,7 @@ def main():
# get path to previous checkpoint # get path to previous checkpoint
resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file, other_dirs=["nn"]) resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file, other_dirs=["nn"])
else: else:
resume_path = os.path.abspath(args_cli.checkpoint) resume_path = retrieve_file_path(args_cli.checkpoint)
# load previously trained model # load previously trained model
agent_cfg["params"]["load_checkpoint"] = True agent_cfg["params"]["load_checkpoint"] = True
agent_cfg["params"]["load_path"] = resume_path agent_cfg["params"]["load_path"] = resume_path
...@@ -109,8 +111,6 @@ def main(): ...@@ -109,8 +111,6 @@ def main():
agent: BasePlayer = runner.create_player() agent: BasePlayer = runner.create_player()
agent.restore(resume_path) agent.restore(resume_path)
agent.reset() agent.reset()
# flag for inference
is_deterministic = True
# reset environment # reset environment
obs = env.reset() obs = env.reset()
...@@ -126,7 +126,7 @@ def main(): ...@@ -126,7 +126,7 @@ def main():
# convert obs to agent format # convert obs to agent format
obs = agent.obs_to_torch(obs) obs = agent.obs_to_torch(obs)
# agent stepping # agent stepping
actions = agent.get_action(obs, is_deterministic) actions = agent.get_action(obs, is_deterministic=True)
# env stepping # env stepping
obs, _, dones, _ = env.step(actions) obs, _, dones, _ = env.step(actions)
......
...@@ -128,7 +128,7 @@ def main(): ...@@ -128,7 +128,7 @@ def main():
agent.set_logger(new_logger) agent.set_logger(new_logger)
# callbacks for agent # callbacks for agent
checkpoint_callback = CheckpointCallback(save_freq=100, save_path=log_dir, name_prefix="model", verbose=2) checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=log_dir, name_prefix="model", verbose=2)
# train the agent # train the agent
agent.learn(total_timesteps=n_timesteps, callback=checkpoint_callback) agent.learn(total_timesteps=n_timesteps, callback=checkpoint_callback)
# save the final model # save the final model
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment