Unverified Commit 692b1779 authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Renames Unitree configs in locomotion tasks to match properly (#714)

# Description

There was a slight inconsistency-- we called `unitree_a1`,
`unitree_go1`, and at the same time `h1` and `g1`. This MR fixes this
inconsistency and adds some more docs.

## Type of change

- This change requires a documentation update

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent 112036e9
[package] [package]
# Note: Semantic Versioning is used: https://semver.org/ # Note: Semantic Versioning is used: https://semver.org/
version = "0.8.0" version = "0.8.1"
# Description # Description
title = "Isaac Lab Environments" title = "Isaac Lab Environments"
......
Changelog Changelog
--------- ---------
0.8.1 (2024-08-02)
~~~~~~~~~~~~~~~~~~
Changed
^^^^^^^
* Renamed the folder names for Unitree robots in the manager-based locomotion tasks. Earlier, there was an inconsistency
in the folder names as some had ``unitree_`` prefix and some didn't. Now, none of the folders have the prefix.
0.8.0 (2024-07-26) 0.8.0 (2024-07-26)
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
......
...@@ -22,6 +22,8 @@ from omni.isaac.lab_assets import G1_MINIMAL_CFG # isort: skip ...@@ -22,6 +22,8 @@ from omni.isaac.lab_assets import G1_MINIMAL_CFG # isort: skip
@configclass @configclass
class G1Rewards(RewardsCfg): class G1Rewards(RewardsCfg):
"""Reward terms for the MDP."""
termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0) termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0)
track_lin_vel_xy_exp = RewTerm( track_lin_vel_xy_exp = RewTerm(
func=mdp.track_lin_vel_xy_yaw_frame_exp, func=mdp.track_lin_vel_xy_yaw_frame_exp,
......
...@@ -22,6 +22,8 @@ from omni.isaac.lab_assets import H1_MINIMAL_CFG # isort: skip ...@@ -22,6 +22,8 @@ from omni.isaac.lab_assets import H1_MINIMAL_CFG # isort: skip
@configclass @configclass
class H1Rewards(RewardsCfg): class H1Rewards(RewardsCfg):
"""Reward terms for the MDP."""
termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0) termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0)
lin_vel_z_l2 = None lin_vel_z_l2 = None
track_lin_vel_xy_exp = RewTerm( track_lin_vel_xy_exp = RewTerm(
......
We would like to acknowledge The AI Institute's efforts in developing the Spot MDP from specifications provided by Boston Dynamics. # Acknowledgment
They trained, verified, and deployed the resulting policy on the Spot hardware and demonstrated its capability and reliability out in the real world.
The accompanying deployment code and access to Spot's low-level API will be available in the Spot RL Researcher Kit.
We thank The AI Institute for their trailblazing use of and contributions to Isaac Lab and for sharing their code publicly with the community to promote wider use of the Nvidia RL ecosystem. We would like to acknowledge [The AI Institute](https://theaiinstitute.com/)'s efforts in developing
the Spot RL environment from the specifications provided by Boston Dynamics.
The team at The AI Institute trained, verified, and deployed the resulting policy on the Spot hardware.
They demonstrated its capability and reliability out in the real world.
The accompanying deployment code and access to Spot's low-level API is available with the [Spot RL
Researcher Kit](https://bostondynamics.com/reinforcement-learning-researcher-kit/).
...@@ -286,8 +286,8 @@ class SpotTerminationsCfg: ...@@ -286,8 +286,8 @@ class SpotTerminationsCfg:
func=mdp.illegal_contact, func=mdp.illegal_contact,
params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), "threshold": 1.0}, params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), "threshold": 1.0},
) )
out_of_bounds = DoneTerm( terrain_out_of_bounds = DoneTerm(
func=spot_mdp.terminations.terrain_out_of_bounds, func=mdp.terrain_out_of_bounds,
params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0}, params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0},
time_out=True, time_out=True,
) )
......
...@@ -8,4 +8,3 @@ ...@@ -8,4 +8,3 @@
from .events import * # noqa: F401, F403 from .events import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403 from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
...@@ -3,6 +3,11 @@ ...@@ -3,6 +3,11 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the reward functions that can be used for Spot's locomotion task.
The functions can be passed to the :class:`omni.isaac.lab.managers.RewardTermCfg` object to
specify the reward function and its parameters.
"""
from __future__ import annotations from __future__ import annotations
...@@ -18,7 +23,9 @@ if TYPE_CHECKING: ...@@ -18,7 +23,9 @@ if TYPE_CHECKING:
from omni.isaac.lab.managers import RewardTermCfg from omni.isaac.lab.managers import RewardTermCfg
# -- Task Rewards ##
# Task Rewards
##
def air_time_reward( def air_time_reward(
...@@ -28,7 +35,7 @@ def air_time_reward( ...@@ -28,7 +35,7 @@ def air_time_reward(
mode_time: float, mode_time: float,
velocity_threshold: float, velocity_threshold: float,
) -> torch.Tensor: ) -> torch.Tensor:
"""Reward longer feet air and contact time""" """Reward longer feet air and contact time."""
# extract the used quantities (to enable type-hinting) # extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
asset: Articulation = env.scene[asset_cfg.name] asset: Articulation = env.scene[asset_cfg.name]
...@@ -146,6 +153,10 @@ class GaitReward(ManagerTermBase): ...@@ -146,6 +153,10 @@ class GaitReward(ManagerTermBase):
torch.logical_or(cmd > 0.0, body_vel > self.velocity_threshold), sync_reward * async_reward, 0.0 torch.logical_or(cmd > 0.0, body_vel > self.velocity_threshold), sync_reward * async_reward, 0.0
) )
"""
Helper functions.
"""
def _sync_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor: def _sync_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor:
"""Reward synchronization of two feet.""" """Reward synchronization of two feet."""
air_time = self.contact_sensor.data.current_air_time air_time = self.contact_sensor.data.current_air_time
...@@ -177,7 +188,9 @@ def foot_clearance_reward( ...@@ -177,7 +188,9 @@ def foot_clearance_reward(
return torch.exp(-torch.sum(reward, dim=1) / std) return torch.exp(-torch.sum(reward, dim=1) / std)
# -- Regularization Penalties ##
# Regularization Penalties
##
def action_smoothness_penalty(env: ManagerBasedRLEnv) -> torch.Tensor: def action_smoothness_penalty(env: ManagerBasedRLEnv) -> torch.Tensor:
......
...@@ -9,3 +9,4 @@ from omni.isaac.lab.envs.mdp import * # noqa: F401, F403 ...@@ -9,3 +9,4 @@ from omni.isaac.lab.envs.mdp import * # noqa: F401, F403
from .curriculums import * # noqa: F401, F403 from .curriculums import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403 from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
...@@ -3,6 +3,12 @@ ...@@ -3,6 +3,12 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
"""Common functions that can be used to define rewards for the learning environment.
The functions can be passed to the :class:`omni.isaac.lab.managers.RewardTermCfg` object to
specify the reward function and its parameters.
"""
from __future__ import annotations from __future__ import annotations
import torch import torch
...@@ -61,6 +67,12 @@ def feet_air_time_positive_biped(env, command_name: str, threshold: float, senso ...@@ -61,6 +67,12 @@ def feet_air_time_positive_biped(env, command_name: str, threshold: float, senso
def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor: def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Penalize feet sliding.
This function penalizes the agent for sliding its feet on the ground. The reward is computed as the
norm of the linear velocity of the feet multiplied by a binary contact sensor. This ensures that the
agent is penalized only when the feet are in contact with the ground.
"""
# Penalize feet sliding # Penalize feet sliding
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0 contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0
......
...@@ -20,33 +20,33 @@ from omni.isaac.lab.managers import SceneEntityCfg ...@@ -20,33 +20,33 @@ from omni.isaac.lab.managers import SceneEntityCfg
if TYPE_CHECKING: if TYPE_CHECKING:
from omni.isaac.lab.envs import ManagerBasedRLEnv from omni.isaac.lab.envs import ManagerBasedRLEnv
"""
Terrain size limits.
"""
def terrain_out_of_bounds( def terrain_out_of_bounds(
env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), distance_buffer: float = 3.0 env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), distance_buffer: float = 3.0
) -> torch.Tensor: ) -> torch.Tensor:
"""Terminate when agents move too close to the edge of the terrain.""" """Terminate when the actor move too close to the edge of the terrain.
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
def get_map_size(env: ManagerBasedRLEnv) -> tuple[float, float]:
grid_width, grid_length = env.scene.terrain.cfg.terrain_generator.size
n_cols = env.scene.terrain.cfg.terrain_generator.num_cols
n_rows = env.scene.terrain.cfg.terrain_generator.num_rows
border_width = env.scene.terrain.cfg.terrain_generator.border_width
length = n_cols * grid_length + 2 * border_width
width = n_rows * grid_width + 2 * border_width
return (width, length)
If the actor moves too close to the edge of the terrain, the termination is activated. The distance
to the edge of the terrain is calculated based on the size of the terrain and the distance buffer.
"""
if env.scene.cfg.terrain.terrain_type == "plane": if env.scene.cfg.terrain.terrain_type == "plane":
return False return False # we have infinite terrain because it is a plane
elif env.scene.cfg.terrain.terrain_type == "generator": elif env.scene.cfg.terrain.terrain_type == "generator":
map_width, map_height = get_map_size(env) # obtain the size of the sub-terrains
terrain_gen_cfg = env.scene.terrain.cfg.terrain_generator
grid_width, grid_length = terrain_gen_cfg.size
n_rows, n_cols = terrain_gen_cfg.num_rows, terrain_gen_cfg.num_cols
border_width = terrain_gen_cfg.border_width
# compute the size of the map
map_width = n_rows * grid_width + 2 * border_width
map_height = n_cols * grid_length + 2 * border_width
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
# check if the agent is out of bounds
x_out_of_bounds = torch.abs(asset.data.root_pos_w[:, 0]) > 0.5 * map_width - distance_buffer x_out_of_bounds = torch.abs(asset.data.root_pos_w[:, 0]) > 0.5 * map_width - distance_buffer
y_out_of_bounds = torch.abs(asset.data.root_pos_w[:, 1]) > 0.5 * map_height - distance_buffer y_out_of_bounds = torch.abs(asset.data.root_pos_w[:, 1]) > 0.5 * map_height - distance_buffer
return torch.logical_or(x_out_of_bounds, y_out_of_bounds) return torch.logical_or(x_out_of_bounds, y_out_of_bounds)
else: else:
raise ValueError("Received unsupported terrain type, must be either 'plane' or 'generator'") raise ValueError("Received unsupported terrain type, must be either 'plane' or 'generator'.")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment