Improves Spot gait reward (#451)

Adds a new reward for Spot's gait, which is based on the foot body name ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [ ] I have run all the tests with `./isaaclab.sh --test` and they pass - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there

Improves Spot gait reward (#451)
Adds a new reward for Spot's gait, which is based on the foot body name ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [ ] I have run all the tests with `./isaaclab.sh --test` and they pass - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
61c43084 · Farbod Farshidian · GitHub · 8816fb76 · 61c43084 · 61c43084
Unverified Commit 61c43084 authored Jun 06, 2024 by Farbod Farshidian Committed by GitHub Jun 06, 2024
3 changed files
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_cfg.py
@@ -15,7 +15,7 @@ from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
 @configclass
 class SpotFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg):
    num_steps_per_env = 24
-    max_iterations = 35000
+    max_iterations = 20000
    save_interval = 50
    experiment_name = "spot_flat"
    empirical_normalization = False

--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/flat_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/flat_env_cfg.py
@@ -64,7 +64,7 @@ class SpotCommandsCfg:
        heading_command=False,
        debug_vis=True,
        ranges=mdp.UniformVelocityCommandCfg.Ranges(
-            lin_vel_x=(-2.0, 4.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0)
+            lin_vel_x=(-2.0, 3.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0)
        ),
    )

@@ -189,7 +189,12 @@ class SpotRewardsCfg:
    air_time = RewardTermCfg(
        func=spot_mdp.air_time_reward,
        weight=5.0,
-        params={"mode_time": 0.3, "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
+        params={
+            "mode_time": 0.3,
+            "velocity_threshold": 0.5,
+            "asset_cfg": SceneEntityCfg("robot"),
+            "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"),
+        },
    )
    base_angular_velocity = RewardTermCfg(
        func=spot_mdp.base_angular_velocity_reward,
@@ -212,9 +217,16 @@ class SpotRewardsCfg:
        },
    )
    gait = RewardTermCfg(
-        func=spot_mdp.gait_reward,
+        func=spot_mdp.GaitReward,
        weight=10.0,
-        params={"std": 0.1, "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
+        params={
+            "std": 0.1,
+            "max_err": 0.2,
+            "velocity_threshold": 0.5,
+            "synced_feet_pair_names": (("fl_foot", "hr_foot"), ("fr_foot", "hl_foot")),
+            "asset_cfg": SceneEntityCfg("robot"),
+            "sensor_cfg": SceneEntityCfg("contact_forces"),
+        },
    )

    # -- penalties
@@ -247,7 +259,11 @@ class SpotRewardsCfg:
    joint_pos = RewardTermCfg(
        func=spot_mdp.joint_position_penalty,
        weight=-0.7,
-        params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*"), "stand_still_scale": 5.0},
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=".*"),
+            "stand_still_scale": 5.0,
+            "velocity_threshold": 0.5,
+        },
    )
    joint_torques = RewardTermCfg(
        func=spot_mdp.joint_torques_penalty,

--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/mdp/rewards.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/mdp/rewards.py
@@ -10,19 +10,28 @@ import torch
 from typing import TYPE_CHECKING

 from omni.isaac.lab.assets import Articulation, RigidObject
-from omni.isaac.lab.managers import SceneEntityCfg
+from omni.isaac.lab.managers import ManagerTermBase, SceneEntityCfg
 from omni.isaac.lab.sensors import ContactSensor

 if TYPE_CHECKING:
    from omni.isaac.lab.envs import ManagerBasedRLEnv
+    from omni.isaac.lab.managers import RewardTermCfg
+

 # -- Task Rewards


-def air_time_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, mode_time: float) -> torch.Tensor:
+def air_time_reward(
+    env: ManagerBasedRLEnv,
+    asset_cfg: SceneEntityCfg,
+    sensor_cfg: SceneEntityCfg,
+    mode_time: float,
+    velocity_threshold: float,
+) -> torch.Tensor:
    """Reward longer feet air and contact time"""
    # extract the used quantities (to enable type-hinting)
    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
+    asset: Articulation = env.scene[asset_cfg.name]
    if contact_sensor.cfg.track_air_time is False:
        raise RuntimeError("Activate ContactSensor's track_air_time!")
    # compute the reward
@@ -33,7 +42,12 @@ def air_time_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, mode_tim
    t_min = torch.clip(t_max, max=mode_time)
    stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time)
    cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 4)
-    reward = torch.where(cmd > 0.0, torch.where(t_max < mode_time, t_min, 0), stance_cmd_reward)
+    body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1).unsqueeze(dim=1).expand(-1, 4)
+    reward = torch.where(
+        torch.logical_or(cmd > 0.0, body_vel > velocity_threshold),
+        torch.where(t_max < mode_time, t_min, 0),
+        stance_cmd_reward,
+    )
    return torch.sum(reward, dim=1)


@@ -62,41 +76,94 @@ def base_linear_velocity_reward(
    return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple


-# ! need to finalize logic, params, and docstring
-def gait_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, std: float) -> torch.Tensor:
-    """Penalize ..."""
-    # extract the used quantities (to enable type-hinting)
-    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
-    if contact_sensor.cfg.track_air_time is False:
-        raise RuntimeError("Activate ContactSensor's track_air_time!")
-    # compute the reward
-    air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
-    contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
-
-    max_err = 0.2
-    indices_0 = [0, 1]
-    indices_1 = [2, 3]
-    cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1)
-    asym_err_0 = torch.clip(
-        torch.square(air_time[:, indices_0[0]] - contact_time[:, indices_0[1]]), max=max_err**2
-    ) + torch.clip(torch.square(contact_time[:, indices_0[0]] - air_time[:, indices_0[1]]), max=max_err**2)
-    asym_err_1 = torch.clip(
-        torch.square(air_time[:, indices_1[0]] - contact_time[:, indices_1[1]]), max=max_err**2
-    ) + torch.clip(torch.square(contact_time[:, indices_1[0]] - air_time[:, indices_1[1]]), max=max_err**2)
-    asym_err_2 = torch.clip(
-        torch.square(air_time[:, indices_0[0]] - contact_time[:, indices_1[0]]), max=max_err**2
-    ) + torch.clip(torch.square(contact_time[:, indices_0[0]] - air_time[:, indices_1[0]]), max=max_err**2)
-    asym_err_3 = torch.clip(
-        torch.square(air_time[:, indices_0[1]] - contact_time[:, indices_1[1]]), max=max_err**2
-    ) + torch.clip(torch.square(contact_time[:, indices_0[1]] - air_time[:, indices_1[1]]), max=max_err**2)
-    sym_err_0 = torch.clip(
-        torch.square(air_time[:, indices_0[0]] - air_time[:, indices_1[1]]), max=max_err**2
-    ) + torch.clip(torch.square(contact_time[:, indices_0[0]] - contact_time[:, indices_1[1]]), max=max_err**2)
-    sym_err_1 = torch.clip(
-        torch.square(air_time[:, indices_0[1]] - air_time[:, indices_1[0]]), max=max_err**2
-    ) + torch.clip(torch.square(contact_time[:, indices_0[1]] - contact_time[:, indices_1[0]]), max=max_err**2)
-    gait_err = asym_err_0 + asym_err_1 + sym_err_0 + sym_err_1 + asym_err_2 + asym_err_3
-    return torch.where(cmd > 0.0, torch.exp(-gait_err / std), 0.0)
+class GaitReward(ManagerTermBase):
+    """Gait enforcing reward term for quadrupeds.
+
+    This reward penalizes contact timing differences between selected foot pairs defined in :attr:`synced_feet_pair_names`
+    to bias the policy towards a desired gait, i.e trotting, bounding, or pacing. Note that this reward is only for
+    quadrupedal gaits with two pairs of synchronized feet.
+    """
+
+    def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
+        """Initialize the term.
+
+        Args:
+            cfg: The configuration of the reward.
+            env: The RL environment instance.
+        """
+        super().__init__(cfg, env)
+        self.std: float = cfg.params["std"]
+        self.max_err: float = cfg.params["max_err"]
+        self.velocity_threshold: float = cfg.params["velocity_threshold"]
+        self.contact_sensor: ContactSensor = env.scene.sensors[cfg.params["sensor_cfg"].name]
+        self.asset: Articulation = env.scene[cfg.params["asset_cfg"].name]
+        # match foot body names with corresponding foot body ids
+        synced_feet_pair_names = cfg.params["synced_feet_pair_names"]
+        if (
+            len(synced_feet_pair_names) != 2
+            or len(synced_feet_pair_names[0]) != 2
+            or len(synced_feet_pair_names[1]) != 2
+        ):
+            raise ValueError("This reward only supports gaits with two pairs of synchronized feet, like trotting.")
+        synced_feet_pair_0 = self.contact_sensor.find_bodies(synced_feet_pair_names[0])[0]
+        synced_feet_pair_1 = self.contact_sensor.find_bodies(synced_feet_pair_names[1])[0]
+        self.synced_feet_pairs = [synced_feet_pair_0, synced_feet_pair_1]
+
+    def __call__(
+        self,
+        env: ManagerBasedRLEnv,
+        std: float,
+        max_err: float,
+        velocity_threshold: float,
+        synced_feet_pair_names,
+        asset_cfg: SceneEntityCfg,
+        sensor_cfg: SceneEntityCfg,
+    ) -> torch.Tensor:
+        """Compute the reward.
+
+        This reward is defined as a multiplication between six terms where two of them enforce pair feet
+        being in sync and the other four rewards if all the other remaining pairs are out of sync
+
+        Args:
+            env: The RL environment instance.
+        Returns:
+            The reward value.
+        """
+        # for synchronous feet, the contact (air) times of two feet should match
+        sync_reward_0 = self._sync_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[0][1])
+        sync_reward_1 = self._sync_reward_func(self.synced_feet_pairs[1][0], self.synced_feet_pairs[1][1])
+        sync_reward = sync_reward_0 * sync_reward_1
+        # for asynchronous feet, the contact time of one foot should match the air time of the other one
+        async_reward_0 = self._async_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[1][0])
+        async_reward_1 = self._async_reward_func(self.synced_feet_pairs[0][1], self.synced_feet_pairs[1][1])
+        async_reward_2 = self._async_reward_func(self.synced_feet_pairs[0][0], self.synced_feet_pairs[1][1])
+        async_reward_3 = self._async_reward_func(self.synced_feet_pairs[1][0], self.synced_feet_pairs[0][1])
+        async_reward = async_reward_0 * async_reward_1 * async_reward_2 * async_reward_3
+        # only enforce gait if cmd > 0
+        cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1)
+        body_vel = torch.linalg.norm(self.asset.data.root_lin_vel_b[:, :2], dim=1)
+        return torch.where(
+            torch.logical_or(cmd > 0.0, body_vel > self.velocity_threshold), sync_reward * async_reward, 0.0
+        )
+
+    def _sync_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor:
+        """Reward synchronization of two feet."""
+        air_time = self.contact_sensor.data.current_air_time
+        contact_time = self.contact_sensor.data.current_contact_time
+        # penalize the difference between the most recent air time and contact time of synced feet pairs.
+        se_air = torch.clip(torch.square(air_time[:, foot_0] - air_time[:, foot_1]), max=self.max_err**2)
+        se_contact = torch.clip(torch.square(contact_time[:, foot_0] - contact_time[:, foot_1]), max=self.max_err**2)
+        return torch.exp(-(se_air + se_contact) / self.std)
+
+    def _async_reward_func(self, foot_0: int, foot_1: int) -> torch.Tensor:
+        """Reward anti-synchronization of two feet."""
+        air_time = self.contact_sensor.data.current_air_time
+        contact_time = self.contact_sensor.data.current_contact_time
+        # penalize the difference between opposing contact modes air time of feet 1 to contact time of feet 2
+        # and contact time of feet 1 to air time of feet 2) of feet pairs that are not in sync with each other.
+        se_act_0 = torch.clip(torch.square(air_time[:, foot_0] - contact_time[:, foot_1]), max=self.max_err**2)
+        se_act_1 = torch.clip(torch.square(contact_time[:, foot_0] - air_time[:, foot_1]), max=self.max_err**2)
+        return torch.exp(-(se_act_0 + se_act_1) / self.std)


 def foot_clearance_reward(
@@ -176,14 +243,16 @@ def joint_acceleration_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg
    return torch.linalg.norm((asset.data.joint_acc), dim=1)


-def joint_position_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float) -> torch.Tensor:
+def joint_position_penalty(
+    env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float, velocity_threshold: float
+) -> torch.Tensor:
    """Penalize joint position error from default on the articulation."""
    # extract the used quantities (to enable type-hinting)
    asset: Articulation = env.scene[asset_cfg.name]
    cmd = torch.linalg.norm(env.command_manager.get_command("base_velocity"), dim=1)
    body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1)
    reward = torch.linalg.norm((asset.data.joint_pos - asset.data.default_joint_pos), dim=1)
-    return torch.where(torch.logical_or(cmd > 0.0, body_vel > 0.5), reward, stand_still_scale * reward)
+    return torch.where(torch.logical_or(cmd > 0.0, body_vel > velocity_threshold), reward, stand_still_scale * reward)


 def joint_torques_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor: