Unverified Commit 61c43084 authored by Farbod Farshidian's avatar Farbod Farshidian Committed by GitHub

Improves Spot gait reward (#451)

Adds a new reward for Spot's gait, which is based on the foot body name

## Type of change

- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have run all the tests with `./isaaclab.sh --test` and they pass
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [ ] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent 8816fb76
......@@ -15,7 +15,7 @@ from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
@configclass
class SpotFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 35000
max_iterations = 20000
save_interval = 50
experiment_name = "spot_flat"
empirical_normalization = False
......
......@@ -64,7 +64,7 @@ class SpotCommandsCfg:
heading_command=False,
debug_vis=True,
ranges=mdp.UniformVelocityCommandCfg.Ranges(
lin_vel_x=(-2.0, 4.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0)
lin_vel_x=(-2.0, 3.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0)
),
)
......@@ -189,7 +189,12 @@ class SpotRewardsCfg:
air_time = RewardTermCfg(
func=spot_mdp.air_time_reward,
weight=5.0,
params={"mode_time": 0.3, "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
params={
"mode_time": 0.3,
"velocity_threshold": 0.5,
"asset_cfg": SceneEntityCfg("robot"),
"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"),
},
)
base_angular_velocity = RewardTermCfg(
func=spot_mdp.base_angular_velocity_reward,
......@@ -212,9 +217,16 @@ class SpotRewardsCfg:
},
)
gait = RewardTermCfg(
func=spot_mdp.gait_reward,
func=spot_mdp.GaitReward,
weight=10.0,
params={"std": 0.1, "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
params={
"std": 0.1,
"max_err": 0.2,
"velocity_threshold": 0.5,
"synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")),
"asset_cfg": SceneEntityCfg("robot"),
"sensor_cfg": SceneEntityCfg("contact_forces"),
},
)
# -- penalties
......@@ -247,7 +259,11 @@ class SpotRewardsCfg:
joint_pos = RewardTermCfg(
func=spot_mdp.joint_position_penalty,
weight=-0.7,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*"), "stand_still_scale": 5.0},
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
"stand_still_scale": 5.0,
"velocity_threshold": 0.5,
},
)
joint_torques = RewardTermCfg(
func=spot_mdp.joint_torques_penalty,
......
......@@ -10,19 +10,28 @@ import torch
from typing import TYPE_CHECKING
from omni.isaac.lab.assets import Articulation, RigidObject
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.managers import ManagerTermBase, SceneEntityCfg
from omni.isaac.lab.sensors import ContactSensor
if TYPE_CHECKING:
from omni.isaac.lab.envs import ManagerBasedRLEnv
from omni.isaac.lab.managers import RewardTermCfg
# -- Task Rewards
def air_time_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, mode_time: float) -> torch.Tensor:
def air_time_reward(
env: ManagerBasedRLEnv,
asset_cfg: SceneEntityCfg,
sensor_cfg: SceneEntityCfg,
mode_time: float,
velocity_threshold: float,
) -> torch.Tensor:
"""Reward longer feet air and contact time"""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
asset: Articulation = env.scene[asset_cfg.name]
if contact_sensor.cfg.track_air_time is False:
raise RuntimeError("Activate ContactSensor's track_air_time!")
# compute the reward
......@@ -33,7 +42,12 @@ def air_time_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, mode_tim
t_min = torch.clip(t_max, max=mode_time)
stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time)
cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 4)
reward = torch.where(cmd > 0.0, torch.where(t_max < mode_time, t_min, 0), stance_cmd_reward)
body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 4)
reward = torch.where(
torch.logical_or(cmd > 0.0, body_vel > velocity_threshold),
torch.where(t_max < mode_time, t_min, 0),
stance_cmd_reward,
)
return torch.sum(reward, dim=1)
......@@ -62,41 +76,94 @@ def base_linear_velocity_reward(
return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple
# ! need to finalize logic, params, and docstring
def gait_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, std: float) -> torch.Tensor:
"""Penalize ..."""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
if contact_sensor.cfg.track_air_time is False:
raise RuntimeError("Activate ContactSensor's track_air_time!")
# compute the reward
air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
max_err = 0.2
indices_0 = [0, 1]
indices_1 = [2, 3]
cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1)
asym_err_0 = torch.clip(
torch.square(air_time[:, indices_0[0]] - contact_time[:, indices_0[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[0]] - air_time[:, indices_0[1]]), max=max_err**2)
asym_err_1 = torch.clip(
torch.square(air_time[:, indices_1[0]] - contact_time[:, indices_1[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_1[0]] - air_time[:, indices_1[1]]), max=max_err**2)
asym_err_2 = torch.clip(
torch.square(air_time[:, indices_0[0]] - contact_time[:, indices_1[0]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[0]] - air_time[:, indices_1[0]]), max=max_err**2)
asym_err_3 = torch.clip(
torch.square(air_time[:, indices_0[1]] - contact_time[:, indices_1[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[1]] - air_time[:, indices_1[1]]), max=max_err**2)
sym_err_0 = torch.clip(
torch.square(air_time[:, indices_0[0]] - air_time[:, indices_1[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[0]] - contact_time[:, indices_1[1]]), max=max_err**2)
sym_err_1 = torch.clip(
torch.square(air_time[:, indices_0[1]] - air_time[:, indices_1[0]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[1]] - contact_time[:, indices_1[0]]), max=max_err**2)
gait_err = asym_err_0 + asym_err_1 + sym_err_0 + sym_err_1 + asym_err_2 + asym_err_3
return torch.where(cmd > 0.0, torch.exp(-gait_err / std), 0.0)
class GaitReward(ManagerTermBase):
"""Gait enforcing reward term for quadrupeds.
This reward penalizes contact timing differences between selected foot pairs defined in :attr:`synced_feet_pair_names`
to bias the policy towards a desired gait, i.e trotting, bounding, or pacing. Note that this reward is only for
quadrupedal gaits with two pairs of synchronized feet.
"""
def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
"""Initialize the term.
Args:
cfg: The configuration of the reward.
env: The RL environment instance.
"""
super().__init__(cfg, env)
self.std: float = cfg.params["std"]
self.max_err: float = cfg.params["max_err"]
self.velocity_threshold: float = cfg.params["velocity_threshold"]
self.contact_sensor: ContactSensor = env.scene.sensors[cfg.params["sensor_cfg"].name]
self.asset: Articulation = env.scene[cfg.params["asset_cfg"].name]
# match foot body names with corresponding foot body ids
synced_feet_pair_names = cfg.params["synced_feet_pair_names"]
if (
len(synced_feet_pair_names) != 2
or len(synced_feet_pair_names[0]) != 2
or len(synced_feet_pair_names[1]) != 2
):
raise ValueError("This reward only supports gaits with two pairs of synchronized feet, like trotting.")
synced_feet_pair_0 = self.contact_sensor.find_bodies(synced_feet_pair_names[0])[0]
synced_feet_pair_1 = self.contact_sensor.find_bodies(synced_feet_pair_names[1])[0]
self.synced_feet_pairs = [synced_feet_pair_0, synced_feet_pair_1]
def __call__(
self,
env: ManagerBasedRLEnv,
std: float,
max_err: float,
velocity_threshold: float,
synced_feet_pair_names,
asset_cfg: SceneEntityCfg,
sensor_cfg: SceneEntityCfg,
) -> torch.Tensor:
"""Compute the reward.
This reward is defined as a multiplication between six terms where two of them enforce pair feet
being in sync and the other four rewards if all the other remaining pairs are out of sync
Args:
env: The RL environment instance.
Returns:
The reward value.
"""
# for synchronous feet, the contact (air) times of two feet should match
sync_reward_0 = self._sync_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[0][1])
sync_reward_1 = self._sync_reward_func(self.synced_feet_pairs[1][0], self.synced_feet_pairs[1][1])
sync_reward = sync_reward_0 * sync_reward_1
# for asynchronous feet, the contact time of one foot should match the air time of the other one
async_reward_0 = self._async_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[1][0])
async_reward_1 = self._async_reward_func(self.synced_feet_pairs[0][1], self.synced_feet_pairs[1][1])
async_reward_2 = self._async_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[1][1])
async_reward_3 = self._async_reward_func(self.synced_feet_pairs[1][0], self.synced_feet_pairs[0][1])
async_reward = async_reward_0 * async_reward_1 * async_reward_2 * async_reward_3
# only enforce gait if cmd > 0
cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1)
body_vel = torch.linalg.norm(self.asset.data.root_lin_vel_b[:, :2], dim=1)
return torch.where(
torch.logical_or(cmd > 0.0, body_vel > self.velocity_threshold), sync_reward * async_reward, 0.0
)
def _sync_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor:
"""Reward synchronization of two feet."""
air_time = self.contact_sensor.data.current_air_time
contact_time = self.contact_sensor.data.current_contact_time
# penalize the difference between the most recent air time and contact time of synced feet pairs.
se_air = torch.clip(torch.square(air_time[:, foot_0] - air_time[:, foot_1]), max=self.max_err**2)
se_contact = torch.clip(torch.square(contact_time[:, foot_0] - contact_time[:, foot_1]), max=self.max_err**2)
return torch.exp(-(se_air + se_contact) / self.std)
def _async_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor:
"""Reward anti-synchronization of two feet."""
air_time = self.contact_sensor.data.current_air_time
contact_time = self.contact_sensor.data.current_contact_time
# penalize the difference between opposing contact modes air time of feet 1 to contact time of feet 2
# and contact time of feet 1 to air time of feet 2) of feet pairs that are not in sync with each other.
se_act_0 = torch.clip(torch.square(air_time[:, foot_0] - contact_time[:, foot_1]), max=self.max_err**2)
se_act_1 = torch.clip(torch.square(contact_time[:, foot_0] - air_time[:, foot_1]), max=self.max_err**2)
return torch.exp(-(se_act_0 + se_act_1) / self.std)
def foot_clearance_reward(
......@@ -176,14 +243,16 @@ def joint_acceleration_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg
return torch.linalg.norm((asset.data.joint_acc), dim=1)
def joint_position_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float) -> torch.Tensor:
def joint_position_penalty(
env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float, velocity_threshold: float
) -> torch.Tensor:
"""Penalize joint position error from default on the articulation."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
cmd = torch.linalg.norm(env.command_manager.get_command("base_velocity"), dim=1)
body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1)
reward = torch.linalg.norm((asset.data.joint_pos - asset.data.default_joint_pos), dim=1)
return torch.where(torch.logical_or(cmd > 0.0, body_vel > 0.5), reward, stand_still_scale * reward)
return torch.where(torch.logical_or(cmd > 0.0, body_vel > velocity_threshold), reward, stand_still_scale * reward)
def joint_torques_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment