Adds locomotion velocity task for the Cassie robot (#278)

# Description This MR adds the locomotion velocity task for the Cassie robot. ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there

Adds locomotion velocity task for the Cassie robot (#278)
# Description This MR adds the locomotion velocity task for the Cassie robot. ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
b0c01d49 · David Hoeller · GitHub · 6ced8f93 · b0c01d49 · b0c01d49
Unverified Commit b0c01d49 authored Dec 18, 2023 by David Hoeller Committed by GitHub Dec 18, 2023
9 changed files
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/assets/config/cassie.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/assets/config/cassie.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""Configuration for Agility robots.
+The following configurations are available:
+* :obj:`CASSIE_CFG`: Agility Cassie robot with simple PD controller for the legs
+Reference: https://github.com/UMich-BipedLab/Cassie_Model/blob/master/urdf/cassie.urdf
+"""
+from __future__ import annotations
+import omni.isaac.orbit.sim as sim_utils
+from omni.isaac.orbit.actuators import ImplicitActuatorCfg
+from omni.isaac.orbit.utils.assets import ISAAC_ORBIT_NUCLEUS_DIR
+from ..articulation import ArticulationCfg
+##
+# Configuration
+##
+CASSIE_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path=f"{ISAAC_ORBIT_NUCLEUS_DIR}/Robots/Agility/Cassie/cassie.usd",
+        activate_contact_sensors=True,
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False,
+            retain_accelerations=False,
+            linear_damping=0.0,
+            angular_damping=0.0,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=1000.0,
+            max_depenetration_velocity=1.0,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=0
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        pos=(0.0, 0.0, 0.9),
+        joint_pos={
+            "hip_abduction_left": 0.1,
+            "hip_rotation_left": 0.0,
+            "hip_flexion_left": 1.0,
+            "thigh_joint_left": -1.8,
+            "ankle_joint_left": 1.57,
+            "toe_joint_left": -1.57,
+            "hip_abduction_right": -0.1,
+            "hip_rotation_right": 0.0,
+            "hip_flexion_right": 1.0,
+            "thigh_joint_right": -1.8,
+            "ankle_joint_right": 1.57,
+            "toe_joint_right": -1.57,
+        },
+        joint_vel={".*": 0.0},
+    ),
+    soft_joint_pos_limit_factor=0.9,
+    actuators={
+        "legs": ImplicitActuatorCfg(
+            joint_names_expr=["hip_.*", "thigh_.*", "ankle_.*"],
+            effort_limit=200.0,
+            velocity_limit=10.0,
+            stiffness={
+                "hip_abduction.*": 100.0,
+                "hip_rotation.*": 100.0,
+                "hip_flexion.*": 200.0,
+                "thigh_joint.*": 200.0,
+                "ankle_joint.*": 200.0,
+            },
+            damping={
+                "hip_abduction.*": 3.0,
+                "hip_rotation.*": 3.0,
+                "hip_flexion.*": 6.0,
+                "thigh_joint.*": 6.0,
+                "ankle_joint.*": 6.0,
+            },
+        ),
+        "toes": ImplicitActuatorCfg(
+            joint_names_expr=["toe_.*"],
+            effort_limit=20.0,
+            velocity_limit=10.0,
+            stiffness={
+                "toe_joint.*": 20.0,
+            },
+            damping={
+                "toe_joint.*": 1.0,
+            },
+        ),
+    },
+)
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/rewards.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/rewards.py
@@ -118,6 +118,15 @@ def joint_acc_l2(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("rob
    return torch.sum(torch.square(asset.data.joint_acc), dim=1)
+def joint_deviation_l1(env, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+    """Penalize joint positions that deviate from the default one."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # compute out of limits constraints
+    angle = asset.data.joint_pos[:, asset_cfg.joint_ids] - asset.data.default_joint_pos[:, asset_cfg.joint_ids]
+    return torch.sum(torch.abs(angle), dim=1)
 def joint_pos_limits(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
    """Penalize joint positions if they cross the soft limits.
@@ -126,8 +135,12 @@ def joint_pos_limits(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg(
    # extract the used quantities (to enable type-hinting)
    asset: Articulation = env.scene[asset_cfg.name]
    # compute out of limits constraints
-    out_of_limits = -(asset.data.joint_pos - asset.data.soft_joint_pos_limits[..., 0]).clip(max=0.0)
+    out_of_limits = -(
-    out_of_limits += (asset.data.joint_pos - asset.data.soft_joint_pos_limits[..., 1]).clip(min=0.0)
+        asset.data.joint_pos[:, asset_cfg.joint_ids] - asset.data.soft_joint_pos_limits[:, asset_cfg.joint_ids, 0]
+    ).clip(max=0.0)
+    out_of_limits += (
+        asset.data.joint_pos[:, asset_cfg.joint_ids] - asset.data.soft_joint_pos_limits[:, asset_cfg.joint_ids, 1]
+    ).clip(min=0.0)
    return torch.sum(out_of_limits, dim=1)

--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/__init__.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gymnasium as gym
+from . import agents, flat_env_cfg, rough_env_cfg
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Velocity-Flat-Cassie-v0",
+    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.CassieFlatEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.CassieFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Flat-Cassie-Play-v0",
+    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.CassieFlatEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.CassieFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Cassie-v0",
+    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.CassieRoughEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.CassieRoughPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Cassie-Play-v0",
+    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.CassieRoughEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.CassieRoughPPORunnerCfg,
+    },
+)
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/agents/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/agents/__init__.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from . import rsl_rl_cfg  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/agents/rsl_rl_cfg.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/agents/rsl_rl_cfg.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+@configclass
+class CassieRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "cassie_rough"
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[512, 256, 128],
+        critic_hidden_dims=[512, 256, 128],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.01,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+@configclass
+class CassieFlatPPORunnerCfg(CassieRoughPPORunnerCfg):
+    def __post_init__(self):
+        super().__post_init__()
+        self.max_iterations = 1000
+        self.experiment_name = "cassie_flat"
+        self.policy.actor_hidden_dims = [128, 128, 128]
+        self.policy.critic_hidden_dims = [128, 128, 128]
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/flat_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/flat_env_cfg.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from .rough_env_cfg import CassieRoughEnvCfg
+@configclass
+class CassieFlatEnvCfg(CassieRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # rewards
+        self.rewards.flat_orientation_l2.weight = -2.5
+        self.rewards.feet_air_time.weight = 20.0
+        self.rewards.joint_deviation_hip.params["asset_cfg"].joint_names = ["hip_rotation_.*"]
+        # change terrain to flat
+        self.scene.terrain.terrain_type = "plane"
+        self.scene.terrain.terrain_generator = None
+        # no height scan
+        self.scene.height_scanner = None
+        self.observations.policy.height_scan = None
+        # no terrain curriculum
+        self.curriculum.terrain_levels = None
+class CassieFlatEnvCfg_PLAY(CassieFlatEnvCfg):
+    def __post_init__(self) -> None:
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/rough_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/config/cassie/rough_env_cfg.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
+from omni.isaac.orbit.managers import SceneEntityCfg
+from omni.isaac.orbit.utils import configclass
+import omni.isaac.orbit_tasks.locomotion.velocity.mdp as mdp
+from omni.isaac.orbit_tasks.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg, RewardsCfg
+##
+# Pre-defined configs
+##
+from omni.isaac.orbit.assets.config.cassie import CASSIE_CFG  # isort: skip
+@configclass
+class CassieRewardsCfg(RewardsCfg):
+    termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0)
+    feet_air_time = RewTerm(
+        func=mdp.feet_air_time_positive_biped,
+        weight=10.0,
+        params={
+            "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*toe"),
+            "command_name": "base_velocity",
+            "threshold": 0.3,
+        },
+    )
+    joint_deviation_hip = RewTerm(
+        func=mdp.joint_deviation_l1,
+        weight=-0.2,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["hip_abduction_.*", "hip_rotation_.*"])},
+    )
+    joint_deviation_toes = RewTerm(
+        func=mdp.joint_deviation_l1,
+        weight=-0.2,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["toe_joint_.*"])},
+    )
+    # penalize toe joint limits
+    dof_pos_limits = RewTerm(
+        func=mdp.joint_pos_limits,
+        weight=-1.0,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names="toe_joint_.*")},
+    )
+@configclass
+class CassieRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
+    """Cassie rough environment configuration."""
+    rewards: CassieRewardsCfg = CassieRewardsCfg()
+    def __post_init__(self):
+        super().__post_init__()
+        # scene
+        self.scene.robot = CASSIE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/pelvis"
+        # actions
+        self.actions.joint_pos.scale = 0.5
+        # randomizations
+        self.randomization.push_robot = None
+        self.randomization.add_base_mass = None
+        self.randomization.reset_robot_joints.params["position_range"] = (1.0, 1.0)
+        self.randomization.base_external_force_torque.params["asset_cfg"].body_names = [".*pelvis"]
+        self.randomization.reset_base.params = {
+            "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
+            "velocity_range": {
+                "x": (0.0, 0.0),
+                "y": (0.0, 0.0),
+                "z": (0.0, 0.0),
+                "roll": (0.0, 0.0),
+                "pitch": (0.0, 0.0),
+                "yaw": (0.0, 0.0),
+            },
+        }
+        # terminations
+        self.terminations.base_contact.params["sensor_cfg"].body_names = [".*pelvis"]
+        # rewards
+        self.rewards.undesired_contacts = None
+        self.rewards.dof_torques_l2.weight = -5.0e-6
+        self.rewards.track_lin_vel_xy_exp.weight = 2.0
+        self.rewards.track_ang_vel_z_exp.weight = 1.0
+        self.rewards.action_rate_l2.weight *= 1.5
+        self.rewards.dof_acc_l2.weight *= 1.5
+@configclass
+class CassieRoughEnvCfg_PLAY(CassieRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # spawn the robot randomly in the grid (instead of their terrain levels)
+        self.scene.terrain.max_init_terrain_level = None
+        # reduce the number of terrains to save memory
+        if self.scene.terrain.terrain_generator is not None:
+            self.scene.terrain.terrain_generator.num_rows = 5
+            self.scene.terrain.terrain_generator.num_cols = 5
+            self.scene.terrain.terrain_generator.curriculum = False
+        self.commands.base_velocity.ranges.lin_vel_x = (0.7, 1.0)
+        self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0)
+        self.commands.base_velocity.ranges.heading = (0.0, 0.0)
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/mdp/rewards.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/mdp/rewards.py
@@ -33,3 +33,22 @@ def feet_air_time(env: RLTaskEnv, command_name: str, sensor_cfg: SceneEntityCfg,
    # no reward for zero command
    reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
    return reward
+def feet_air_time_positive_biped(env, command_name: str, threshold: float, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Reward long steps taken by the feet for bipeds.
+    This function rewards the agent for taking steps up to a specified threshold and also keep one foot at
+    a time in the air.
+    If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
+    """
+    contact_sensor = env.scene.sensors[sensor_cfg.name]
+    # compute the reward
+    last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
+    was_in_air = (last_air_time > 0.0).float()
+    num_feet_contact = torch.sum(was_in_air, dim=1).int()
+    reward = torch.where(num_feet_contact == 1, torch.sum(last_air_time.clamp_max_(threshold), dim=1), 0.0)
+    # no reward for zero command
+    reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
+    return reward
--- a/source/standalone/demos/bipeds.py
+++ b/source/standalone/demos/bipeds.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""
+This script demonstrates how to simulate a bipedal robot.
+"""
+from __future__ import annotations
+"""Launch Isaac Sim Simulator first."""
+import argparse
+from omni.isaac.orbit.app import AppLauncher
+# add argparse arguments
+parser = argparse.ArgumentParser(description="This script demonstrates how to simulate a bipedal robot.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+"""Rest everything follows."""
+import traceback
+import carb
+import omni.isaac.orbit.sim as sim_utils
+from omni.isaac.orbit.assets import Articulation
+from omni.isaac.orbit.assets.config.cassie import CASSIE_CFG
+from omni.isaac.orbit.sim import SimulationContext
+def main():
+    """Main function."""
+    # Load kit helper
+    sim = SimulationContext(
+        sim_utils.SimulationCfg(device="cpu", use_gpu_pipeline=False, dt=0.005, physx=sim_utils.PhysxCfg(use_gpu=False))
+    )
+    # Set main camera
+    sim.set_camera_view(eye=[3.5, 3.5, 3.5], target=[0.0, 0.0, 0.0])
+    # Spawn things into stage
+    # Ground-plane
+    cfg = sim_utils.GroundPlaneCfg()
+    cfg.func("/World/defaultGroundPlane", cfg)
+    # Lights
+    cfg = sim_utils.DistantLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75))
+    cfg.func("/World/Light", cfg)
+    # Robots
+    robot_cfg = CASSIE_CFG
+    robot_cfg.spawn.func("/World/Cassie/Robot_1", robot_cfg.spawn, translation=(1.5, 0.5, 0.42))
+    # create handles for the robots
+    robots = Articulation(robot_cfg.replace(prim_path="/World/Cassie/Robot.*"))
+    # Play the simulator
+    sim.reset()
+    # Now we are ready!
+    print("[INFO]: Setup complete...")
+    # Define simulation stepping
+    sim_dt = sim.get_physics_dt()
+    sim_time = 0.0
+    count = 0
+    # Simulate physics
+    while simulation_app.is_running():
+        # reset
+        if count % 200 == 0:
+            # reset counters
+            sim_time = 0.0
+            count = 0
+            # reset dof state
+            joint_pos, joint_vel = robots.data.default_joint_pos, robots.data.default_joint_vel
+            robots.write_joint_state_to_sim(joint_pos, joint_vel)
+            robots.write_root_pose_to_sim(robots.data.default_root_state[:, :7])
+            robots.write_root_velocity_to_sim(robots.data.default_root_state[:, 7:])
+            robots.reset()
+            # reset command
+            print(">>>>>>>> Reset!")
+        # apply action to the robot
+        robots.set_joint_position_target(robots.data.default_joint_pos.clone())
+        robots.write_data_to_sim()
+        # perform step
+        sim.step()
+        # update sim-time
+        sim_time += sim_dt
+        count += 1
+        # update buffers
+        robots.update(sim_dt)
+if __name__ == "__main__":
+    try:
+        # run the main execution
+        main()
+    except Exception as err:
+        carb.log_error(err)
+        carb.log_error(traceback.format_exc())
+        raise
+    finally:
+        # close sim app
+        simulation_app.close()