Unverified Commit 55745eb3 authored by Farbod Farshidian's avatar Farbod Farshidian Committed by GitHub

Adds the Spot locomotion environment (#450)

Adds the training task for the Spot robot.  


## Type of change

- New feature

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have run all the tests with `./isaaclab.sh --test` and they pass
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent 4ed26b9c
......@@ -204,7 +204,6 @@ html_css_files = ["custom.css"]
html_theme_options = {
"collapse_navigation": True,
"repository_url": "https://github.com/isaac-sim/IsaacLab",
"announcement": "We have now released v0.3.0! Please use the latest version for the best experience.",
"use_repository_button": True,
"use_issues_button": True,
"use_edit_page_button": True,
......
......@@ -111,39 +111,41 @@ Environments based on legged locomotion tasks.
.. table::
:widths: 33 37 30
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| World | Environment ID | Description |
+==============================+==============================================+=========================================================================+
+==============================+==============================================+==============================================================================+
| |velocity-flat-anymal-b| | |velocity-flat-anymal-b-link| | Track a velocity command on flat terrain with the Anymal B robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-anymal-b| | |velocity-rough-anymal-b-link| | Track a velocity command on rough terrain with the Anymal B robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-anymal-c| | | |velocity-flat-anymal-c-link| | Track a velocity command on flat terrain with the Anymal C robot |
| | | |velocity-flat-anymal-c-direct-link| | |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-anymal-c| | | |velocity-rough-anymal-c-link| | Track a velocity command on rough terrain with the Anymal C robot |
| | | |velocity-rough-anymal-c-direct-link| | |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-anymal-d| | |velocity-flat-anymal-d-link| | Track a velocity command on flat terrain with the Anymal D robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-anymal-d| | |velocity-rough-anymal-d-link| | Track a velocity command on rough terrain with the Anymal D robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-unitree-a1| | |velocity-flat-unitree-a1-link| | Track a velocity command on flat terrain with the Unitree A1 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-unitree-a1| | |velocity-rough-unitree-a1-link| | Track a velocity command on rough terrain with the Unitree A1 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-unitree-go1| | |velocity-flat-unitree-go1-link| | Track a velocity command on flat terrain with the Unitree Go1 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-unitree-go1| | |velocity-rough-unitree-go1-link| | Track a velocity command on rough terrain with the Unitree Go1 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-unitree-go2| | |velocity-flat-unitree-go2-link| | Track a velocity command on flat terrain with the Unitree Go2 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-unitree-go2| | |velocity-rough-unitree-go2-link| | Track a velocity command on rough terrain with the Unitree Go2 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-spot| | |velocity-flat-spot-link| | Track a velocity command on flat terrain with the Boston Dynamics Spot robot |
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-flat-h1| | |velocity-flat-h1-link| | Track a velocity command on flat terrain with the Unitree H1 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
| |velocity-rough-h1| | |velocity-rough-h1-link| | Track a velocity command on rough terrain with the Unitree H1 robot |
+------------------------------+----------------------------------------------+-------------------------------------------------------------------------+
+------------------------------+----------------------------------------------+------------------------------------------------------------------------------+
.. |velocity-flat-anymal-b-link| replace:: `Isaac-Velocity-Flat-Anymal-B-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_b/flat_env_cfg.py>`__
.. |velocity-rough-anymal-b-link| replace:: `Isaac-Velocity-Rough-Anymal-B-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_b/rough_env_cfg.py>`__
......@@ -166,6 +168,8 @@ Environments based on legged locomotion tasks.
.. |velocity-flat-unitree-go2-link| replace:: `Isaac-Velocity-Flat-Unitree-Go2-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/unitree_go2/flat_env_cfg.py>`__
.. |velocity-rough-unitree-go2-link| replace:: `Isaac-Velocity-Rough-Unitree-Go2-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/unitree_go2/rough_env_cfg.py>`__
.. |velocity-flat-spot-link| replace:: `Isaac-Velocity-Flat-Spot-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/spot/flat_env_cfg.py>`__
.. |velocity-flat-h1-link| replace:: `Isaac-Velocity-Flat-H1-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/h1/flat_env_cfg.py>`__
.. |velocity-rough-h1-link| replace:: `Isaac-Velocity-Rough-H1-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/h1/rough_env_cfg.py>`__
......@@ -182,6 +186,7 @@ Environments based on legged locomotion tasks.
.. |velocity-rough-unitree-go1| image:: ../_static/tasks/locomotion/go1_rough.jpg
.. |velocity-flat-unitree-go2| image:: ../_static/tasks/locomotion/go2_flat.jpg
.. |velocity-rough-unitree-go2| image:: ../_static/tasks/locomotion/go2_rough.jpg
.. |velocity-flat-spot| image:: ../_static/tasks/locomotion/spot_flat.jpg
.. |velocity-flat-h1| image:: ../_static/tasks/locomotion/h1_flat.jpg
.. |velocity-rough-h1| image:: ../_static/tasks/locomotion/h1_rough.jpg
......
......@@ -28,8 +28,10 @@ from .actuator_cfg import (
ActuatorNetLSTMCfg,
ActuatorNetMLPCfg,
DCMotorCfg,
DelayedPDActuatorCfg,
IdealPDActuatorCfg,
ImplicitActuatorCfg,
RemotizedPDActuatorCfg,
)
from .actuator_net import ActuatorNetLSTM, ActuatorNetMLP
from .actuator_pd import DCMotor, IdealPDActuator, ImplicitActuator
from .actuator_pd import DCMotor, DelayedPDActuator, IdealPDActuator, ImplicitActuator, RemotizedPDActuator
......@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: BSD-3-Clause
import torch
from collections.abc import Iterable
from dataclasses import MISSING
from typing import Literal
......@@ -153,3 +154,42 @@ class ActuatorNetMLPCfg(DCMotorCfg):
The index *0* corresponds to current time-step, while *n* corresponds to n-th
time-step in the past. The allocated history length is `max(input_idx) + 1`.
"""
@configclass
class DelayedPDActuatorCfg(IdealPDActuatorCfg):
"""Configuration for a delayed PD actuator."""
class_type: type = actuator_pd.DelayedPDActuator
min_num_time_lags: int = 0
"""Minimum number of physics time-steps that the actuator command may be delayed."""
max_num_time_lags: int = 0
"""Maximum number of physics time-steps that the actuator command may be delayed."""
num_time_lags: int = 0
"""The number of physics time-steps that the actuator command will be delayed.
Note:
This values cannot be greater than `max_num_time_lags`.
"""
@configclass
class RemotizedPDActuatorCfg(DelayedPDActuatorCfg):
"""Configuration for a remotized PD actuator.
Note:
The torque output limits for this actuator is derived from a linear interpolation of a lookup table
in :attr:`joint_parameter_lookup` describing the relationship between joint angles and the output torques.
"""
class_type: type = actuator_pd.RemotizedPDActuator
joint_parameter_lookup: torch.Tensor = MISSING
"""Joint parameter lookup table. Shape is (num_lookup_points, 3).
The tensor describes relationship between the joint angle (rad), the transmission ratio (in/out),
and the output torque (N*m).
"""
......@@ -11,10 +11,18 @@ from typing import TYPE_CHECKING
from omni.isaac.core.utils.types import ArticulationActions
from omni.isaac.lab.utils import DelayBuffer, LinearInterpolation
from .actuator_base import ActuatorBase
if TYPE_CHECKING:
from .actuator_cfg import DCMotorCfg, IdealPDActuatorCfg, ImplicitActuatorCfg
from .actuator_cfg import (
DCMotorCfg,
DelayedPDActuatorCfg,
IdealPDActuatorCfg,
ImplicitActuatorCfg,
RemotizedPDActuatorCfg,
)
"""
......@@ -210,3 +218,138 @@ class DCMotor(IdealPDActuator):
# clip the torques based on the motor limits
return torch.clip(effort, min=min_effort, max=max_effort)
class DelayedPDActuator(IdealPDActuator):
"""Ideal PD actuator with delayed data.
The DelayedPDActuator has configurable minimum and maximum time lag values, which are used to initialize a
DelayBuffer to hold a queue of pending actuator commands. On reset, a value time_lags will be randomly sampled
from the min and max time lag bounds. At every physics step, the most recent actuation value is pushed to the
DelayBuffer, but the final actuation value applied to simulation will be `time_lags` physics steps in the past.
"""
def __init__(
self,
cfg: DelayedPDActuatorCfg,
joint_names: list[str],
joint_ids: Sequence[int],
num_envs: int,
device: str,
stiffness: torch.Tensor | float = 0.0,
damping: torch.Tensor | float = 0.0,
armature: torch.Tensor | float = 0.0,
friction: torch.Tensor | float = 0.0,
effort_limit: torch.Tensor | float = torch.inf,
velocity_limit: torch.Tensor | float = torch.inf,
):
super().__init__(
cfg,
joint_names,
joint_ids,
num_envs,
device,
stiffness,
damping,
armature,
friction,
effort_limit,
velocity_limit,
)
# instantiate the delay buffers
self.positions_delay_buffer = DelayBuffer(cfg.max_num_time_lags, num_envs=num_envs, device=device)
self.velocities_delay_buffer = DelayBuffer(cfg.max_num_time_lags, num_envs=num_envs, device=device)
self.efforts_delay_buffer = DelayBuffer(cfg.max_num_time_lags, num_envs=num_envs, device=device)
# all of the envs
self._ALL_INDICES = torch.arange(num_envs, dtype=torch.long, device=device)
def reset(self, env_ids: Sequence[int]):
super().reset(env_ids)
# number of environments (since env_ids can be a slice)
env_size = self._ALL_INDICES[env_ids].size()
# set a new random delay for environments in env_ids
time_lags = self.positions_delay_buffer.time_lags
time_lags[env_ids] = torch.randint(
low=self.cfg.min_num_time_lags,
high=self.cfg.max_num_time_lags + 1,
size=env_size,
device=self._device,
dtype=torch.int,
)
# set delays
self.positions_delay_buffer.set_time_lag(time_lags)
self.velocities_delay_buffer.set_time_lag(time_lags)
self.efforts_delay_buffer.set_time_lag(time_lags)
# reset buffers
self.positions_delay_buffer.reset(env_ids)
self.velocities_delay_buffer.reset(env_ids)
self.efforts_delay_buffer.reset(env_ids)
def compute(
self, control_action: ArticulationActions, joint_pos: torch.Tensor, joint_vel: torch.Tensor
) -> ArticulationActions:
# apply delay based on the delay the model for all the setpoints
control_action.joint_positions = self.positions_delay_buffer.compute(control_action.joint_positions)
control_action.joint_velocities = self.velocities_delay_buffer.compute(control_action.joint_velocities)
control_action.joint_efforts = self.efforts_delay_buffer.compute(control_action.joint_efforts)
# compte actuator model
return super().compute(control_action, joint_pos, joint_vel)
class RemotizedPDActuator(DelayedPDActuator):
"""Ideal PD actuator with angle dependent torque limits.
The torque limits for this actuator are applied by querying a lookup table describing the relationship between
the joint angle and the maximum output torque.
"""
def __init__(
self,
cfg: RemotizedPDActuatorCfg,
joint_names: list[str],
joint_ids: Sequence[int],
num_envs: int,
device: str,
stiffness: torch.Tensor | float = 0.0,
damping: torch.Tensor | float = 0.0,
armature: torch.Tensor | float = 0.0,
friction: torch.Tensor | float = 0.0,
effort_limit: torch.Tensor | float = torch.inf,
velocity_limit: torch.Tensor | float = torch.inf,
):
# remove effort and velocity box constraints from the base class
cfg.effort_limit = torch.inf
cfg.velocity_limit = torch.inf
# call the base method and set default effort_limit and velocity_limit to inf
super().__init__(
cfg, joint_names, joint_ids, num_envs, device, stiffness, damping, armature, friction, torch.inf, torch.inf
)
self._joint_parameter_lookup = cfg.joint_parameter_lookup.to(device=device)
# define remotized joint torque limit
self._torque_limit = LinearInterpolation(self.angle_samples, self.max_torque_samples, device=device)
@property
def angle_samples(self) -> torch.Tensor:
return self._joint_parameter_lookup[:, 0]
@property
def transmission_ratio_samples(self) -> torch.Tensor:
return self._joint_parameter_lookup[:, 1]
@property
def max_torque_samples(self) -> torch.Tensor:
return self._joint_parameter_lookup[:, 2]
def compute(
self, control_action: ArticulationActions, joint_pos: torch.Tensor, joint_vel: torch.Tensor
) -> ArticulationActions:
# call the base method
control_action = super().compute(control_action, joint_pos, joint_vel)
# compute the absolute torque limits for the current joint positions
abs_torque_limits = self._torque_limit.compute(joint_pos)
# apply the limits
control_action.joint_efforts = torch.clamp(
control_action.joint_efforts, min=-abs_torque_limits, max=abs_torque_limits
)
self.applied_effort = control_action.joint_efforts
return control_action
......@@ -6,7 +6,9 @@
"""Sub-package containing utilities for common operations and helper functions."""
from .array import *
from .buffers import *
from .configclass import configclass
from .dict import *
from .linear_interpolation import *
from .string import *
from .timer import Timer
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Sub-module containing different buffers."""
from .circular_buffer import BatchedCircularBuffer
from .delay_buffer import DelayBuffer
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from collections.abc import Sequence
class BatchedCircularBuffer:
"""Circular buffer for storing a history of batched tensor data."""
def __init__(self, max_len: int, batch_size: int, device: str):
"""Initialize the circular buffer.
Args:
max_len: The maximum length of the circular buffer. The minimum value is one.
batch_size: The batch dimension of the data.
device: Device used for processing.
"""
if max_len < 1:
raise ValueError(f"The buffer size should be greater than zero. However, it is set to {max_len}!")
self._max_len = max_len
self._batch_size = batch_size
self._device = device
self._ALL_INDICES = torch.arange(batch_size, device=device)
# number of data pushes passed since the last call to :meth:`reset`
self._num_pushes = torch.zeros(batch_size, dtype=torch.long, device=device)
# the pointer to the current head of the circular buffer (-1 means not initialized)
self._pointer: int = -1
# the circular buffer for data storage
self._buffer: torch.Tensor | None = None # the data buffer
def reset(self, batch_ids: Sequence[int] | None = None):
"""Reset the circular buffer.
Args:
batch_ids: Elements to reset in the batch dimension.
"""
# resolve all indices
if batch_ids is None:
batch_ids = self._ALL_INDICES
self._num_pushes[batch_ids] = 0
def append(self, data: torch.Tensor):
"""Append the data to the circular buffer.
Args:
data: The data to be appended, where `len(data) == self.batch_size`.
"""
if data.shape[0] != self.batch_size:
raise ValueError(f"The input data has {data.shape[0]} environments while expecting {self.batch_size}")
# at the fist call, initialize the buffer
if self._buffer is None:
self._pointer = -1
self._buffer = torch.empty((self.max_len, *data.shape), dtype=data.dtype, device=self._device)
# move the head to the next slot
self._pointer = (self._pointer + 1) % self.max_len
# add the new data to the last layer
self._buffer[self._pointer] = data
# increment number of number of pushes
self._num_pushes += 1
def __getitem__(self, key: torch.Tensor) -> torch.Tensor:
"""Get the data from the circular buffer in LIFO fashion.
Args:
key: The index of the data to be retrieved. It can be a single integer or a tensor of integers.
"""
if len(key) != self.batch_size:
raise ValueError(f"The key has length {key.shape[0]} while expecting {self.batch_size}")
if torch.any(self._num_pushes == 0) or self._buffer is None:
raise ValueError("Attempting to get data on an empty circular buffer.")
# admissible lag
valid_keys = torch.minimum(key, self._num_pushes - 1)
# the index in the circular buffer (pointer points to the last+1 index)
index_in_buffer = torch.remainder(self._pointer - valid_keys, self.max_len)
# return output
return self._buffer[index_in_buffer, self._ALL_INDICES, :]
"""
Properties.
"""
@property
def batch_size(self) -> int:
"""The batch size in the ring buffer."""
return self._batch_size
@property
def device(self) -> str:
"""Device used for processing."""
return self._device
@property
def max_len(self) -> int:
"""The maximum length of the ring buffer."""
return self._max_len
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from collections.abc import Sequence
from .circular_buffer import BatchedCircularBuffer
class DelayBuffer:
"""Provides the functionality to simulate delays for a data stream.
The delay can be set constant, using :meth:`set_time_lag` with an integer input or different per environment, using
the same method with an integer tensor input. If the requested delay is larger than the number of buffered data
points since the last reset, the :meth:`compute` will return the oldest stored data, which is obviously less delayed
than expected. Internally, this class uses a circular buffer for better computation efficiency.
"""
def __init__(self, max_num_histories: int, num_envs: int, device: str):
"""Initialize the Delay Buffer.
By default all the environments will have no delay.
Args:
max_num_histories: The maximum number of time steps that the data will be buffered. It is recommended
to set this value equal to the maximum number of time lags that are expected. The minimum value is zero.
num_envs: Number of articulations in the view.
device: Device used for processing.
"""
self._max_num_histories = max(0, max_num_histories)
# the buffer size: current data plus the history length
self._circular_buffer = BatchedCircularBuffer(self._max_num_histories + 1, num_envs, device)
# the minimum and maximum lags across all environments.
self._min_time_lag = 0
self._max_time_lag = 0
# the lags for each environment.
self._time_lags = torch.zeros(num_envs, dtype=torch.int, device=device)
def set_time_lag(self, time_lag: int | torch.Tensor):
"""Sets the time lags for each environment.
Args:
time_lag: A single integer will result in a fixed delay across all environments, while a tensor of integers
with the size (num_envs, ) will set a different delay for each environment. This value cannot be larger than
:meth:`max_num_histories`.
"""
# parse requested time_lag
if isinstance(time_lag, int):
self._min_time_lag = time_lag
self._max_time_lag = time_lag
self._time_lags = torch.ones(self.num_envs, dtype=torch.int, device=self.device) * time_lag
elif isinstance(time_lag, torch.Tensor):
if time_lag.size() != torch.Size([
self.num_envs,
]):
raise TypeError(
f"Invalid size for time_lag: {time_lag.size()}. Expected torch.Size([{self.num_envs}])."
)
self._min_time_lag = torch.min(time_lag).item()
self._max_time_lag = torch.max(time_lag).item()
self._time_lags = time_lag.to(dtype=torch.int, device=self.device)
else:
raise TypeError(f"Invalid type for time_lag: {type(time_lag)}. Expected int or Tensor.")
# check that time_lag is feasible
if self._min_time_lag < 0:
raise ValueError("Minimum of `time_lag` cannot be negative!")
if self._max_time_lag > self._max_num_histories:
raise ValueError(f"Maximum of `time_lag` cannot be larger than {self._max_num_histories}!")
def reset(self, env_ids: Sequence[int] | None = None):
"""Reset the delay buffer.
Args:
env_ids: List of environment IDs to reset.
"""
self._circular_buffer.reset(env_ids)
def compute(self, data: torch.Tensor) -> torch.Tensor:
"""Adds the data to buffer and returns a stale version of the data based on :meth:`time_lags`.
If the requested delay is larger than the number of buffered data points since the last reset, the :meth:`compute`
will return the oldest stored data, which is obviously less delayed than expected.
Args:
data: The input data. Shape is ``(num_envs, num_feature)``.
Returns:
The delayed version of the input data. Shape is ``(num_envs, num_feature)``.
"""
# add the new data to the last layer
self._circular_buffer.append(data)
# return output
delayed_data = self._circular_buffer[self._time_lags]
return delayed_data.clone()
"""
Properties.
"""
@property
def num_envs(self) -> int:
"""Number of articulations in the view."""
return self._circular_buffer.batch_size
@property
def device(self) -> str:
"""Device used for processing."""
return self._circular_buffer.device
@property
def max_num_histories(self) -> int:
"""Maximum number of time steps that the data can buffered."""
return self._max_num_histories
@property
def min_time_lag(self) -> int:
"""Minimum number of time steps that the data can be delayed. This value cannot be negative."""
return self._min_time_lag
@property
def max_time_lag(self) -> int:
"""Maximum number of time steps that the data can be delayed. This value cannot be greater than :meth:`max_num_histories`."""
return self._max_time_lag
@property
def time_lags(self) -> torch.Tensor:
"""The time lag for each environment. These values are between :meth:`mim_time_lag` and :meth:`max_time_lag`."""
return self._time_lags
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
class LinearInterpolation:
"""Linearly interpolates a sampled scalar function ``y = f(x)`` where :math:`f: R -> R`.
It assumes that the function's domain, X, is sampled in an ascending order. For the query points out of
the sampling range of X, the class does a zero-order-hold extrapolation based on the boundary values.
"""
def __init__(self, x: torch.Tensor, y: torch.Tensor, device: str):
"""Initialize the Linear Interpolation.
The scalar function maps from real values, x, to real values, y.
Args:
x: An ascending vector of samples from the function's domain.
y: The function's values associated to x.
device: Device used for processing.
"""
# make sure that input tensors are 1D of size (num_samples,)
self._x = x.view(-1).clone().to(device=device)
self._y = y.view(-1).clone().to(device=device)
# make sure sizes are correct
if self._x.numel() == 0:
raise ValueError("Input tensor x is empty!")
if self._x.numel() != self._y.numel():
raise ValueError("Tensor x and y should have the same size!")
# make sure that x is sorted
if torch.any(self._x[1:] < self._x[:-1]):
raise ValueError("x is not sorted!")
def compute(self, q: torch.Tensor) -> torch.Tensor:
"""Calculates a linearly interpolated values for the query points.
Args:
q: The query points. It can have any arbitrary shape.
Returns:
The interpolation values. It has the same shape as the input tensor.
"""
# serialized q
q_1d = q.view(-1)
# Number of elements in the x that are strictly smaller than query points (use int32 instead of int64)
num_smaller_elements = torch.sum(self._x.unsqueeze(1) < q_1d.unsqueeze(0), dim=0, dtype=torch.int)
# The index pointing to the first element in x such that x[lower_bound_i] < q_i
# If a point is smaller that all x elements, it will assign 0
lower_bound = torch.clamp(num_smaller_elements - 1, min=0)
# The index pointing to the first element in x such that x[upper_bound_i] >= q_i
# If a point is greater than all x elements, it will assign the last elements' index
upper_bound = torch.clamp(num_smaller_elements, max=self._x.numel() - 1)
# compute the weight as: (q_i - x_lb) / (x_ub - x_lb)
weight = (q_1d - self._x[lower_bound]) / (self._x[upper_bound] - self._x[lower_bound])
# If a point is out of bounds assign weight 0.0
weight[upper_bound == lower_bound] = 0.0
# Perform linear interpolation
fq = self._y[lower_bound] + weight * (self._y[upper_bound] - self._y[lower_bound])
# deserialized fq
fq = fq.view(q.shape)
return fq
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import torch
import unittest
"""Launch Isaac Sim Simulator first."""
from omni.isaac.lab.app import AppLauncher, run_tests
# launch omniverse app in headless mode
simulation_app = AppLauncher(headless=True).app
from omni.isaac.lab.utils import DelayBuffer
class TestDelayBuffer(unittest.TestCase):
"""Test fixture for checking Delay Buffer utilities in Orbit."""
device: str = "cpu"
num_envs: int = 10
max_num_histories: int = 4
def generate_data(self, length: int) -> torch.Tensor:
for step in range(length):
yield torch.ones((self.num_envs, 1), dtype=int, device=self.device) * step
def test_constant_time_lags(self):
"""Test constant delay."""
const_lag: int = 3
delay_buffer = DelayBuffer(self.max_num_histories, num_envs=self.num_envs, device=self.device)
delay_buffer.set_time_lag(const_lag)
all_data = []
for i, data in enumerate(self.generate_data(20)):
all_data.append(data)
# apply delay
delayed_data = delay_buffer.compute(data)
error = delayed_data - all_data[max(0, i - const_lag)]
self.assertTrue(torch.all(error == 0))
def test_reset(self):
"""Test resetting the last two environments after iteration `reset_itr`."""
const_lag: int = 2
reset_itr = 10
delay_buffer = DelayBuffer(self.max_num_histories, num_envs=self.num_envs, device=self.device)
delay_buffer.set_time_lag(const_lag)
all_data = []
for i, data in enumerate(self.generate_data(20)):
all_data.append(data)
# from 'reset_itr' iteration reset the last and second-to-last environments
if i == reset_itr:
delay_buffer.reset([-2, -1])
# apply delay
delayed_data = delay_buffer.compute(data)
# before 'reset_itr' is is similar to test_constant_time_lags
# after that indices [-2, -1] should be treated separately
if i < reset_itr:
error = delayed_data - all_data[max(0, i - const_lag)]
self.assertTrue(torch.all(error == 0))
else:
# error_regular = delayed_data[:-2] - all_data[max(0, i - const_lag)][:-2]
error2_reset = delayed_data[-2, -1] - all_data[max(reset_itr, i - const_lag)][-2, -1]
# self.assertTrue(torch.all(error_regular == 0))
self.assertTrue(torch.all(error2_reset == 0))
def test_random_time_lags(self):
"""Test random delay."""
max_lag: int = 3
time_lags = torch.randint(low=0, high=max_lag + 1, size=(self.num_envs,), dtype=torch.int, device=self.device)
delay_buffer = DelayBuffer(self.max_num_histories, num_envs=self.num_envs, device=self.device)
delay_buffer.set_time_lag(time_lags)
all_data = []
for i, data in enumerate(self.generate_data(20)):
all_data.append(data)
# apply delay
delayed_data = delay_buffer.compute(data)
true_delayed_index = torch.maximum(i - delay_buffer.time_lags, torch.zeros_like(delay_buffer.time_lags))
true_delayed_index = true_delayed_index.tolist()
for i in range(self.num_envs):
error = delayed_data[i] - all_data[true_delayed_index[i]][i]
self.assertTrue(torch.all(error == 0))
if __name__ == "__main__":
run_tests()
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Configuration for the Boston Dynamics Spot robot."""
from __future__ import annotations
import torch
import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.actuators import DelayedPDActuatorCfg, RemotizedPDActuatorCfg
from omni.isaac.lab.assets.articulation import ArticulationCfg
from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
# Note: This data was collected by the Boston Dynamics AI Institute.
joint_parameter_lookup = torch.tensor([
[-2.792900, -24.776718, 37.165077],
[-2.767442, -26.290108, 39.435162],
[-2.741984, -27.793369, 41.690054],
[-2.716526, -29.285997, 43.928996],
[-2.691068, -30.767536, 46.151304],
[-2.665610, -32.237423, 48.356134],
[-2.640152, -33.695168, 50.542751],
[-2.614694, -35.140221, 52.710331],
[-2.589236, -36.572052, 54.858078],
[-2.563778, -37.990086, 56.985128],
[-2.538320, -39.393730, 59.090595],
[-2.512862, -40.782406, 61.173609],
[-2.487404, -42.155487, 63.233231],
[-2.461946, -43.512371, 65.268557],
[-2.436488, -44.852371, 67.278557],
[-2.411030, -46.174873, 69.262310],
[-2.385572, -47.479156, 71.218735],
[-2.360114, -48.764549, 73.146824],
[-2.334656, -50.030334, 75.045502],
[-2.309198, -51.275761, 76.913641],
[-2.283740, -52.500103, 78.750154],
[-2.258282, -53.702587, 80.553881],
[-2.232824, -54.882442, 82.323664],
[-2.207366, -56.038860, 84.058290],
[-2.181908, -57.171028, 85.756542],
[-2.156450, -58.278133, 87.417200],
[-2.130992, -59.359314, 89.038971],
[-2.105534, -60.413738, 90.620607],
[-2.080076, -61.440529, 92.160793],
[-2.054618, -62.438812, 93.658218],
[-2.029160, -63.407692, 95.111538],
[-2.003702, -64.346268, 96.519402],
[-1.978244, -65.253670, 97.880505],
[-1.952786, -66.128944, 99.193417],
[-1.927328, -66.971176, 100.456764],
[-1.901870, -67.779457, 101.669186],
[-1.876412, -68.552864, 102.829296],
[-1.850954, -69.290451, 103.935677],
[-1.825496, -69.991325, 104.986988],
[-1.800038, -70.654541, 105.981812],
[-1.774580, -71.279190, 106.918785],
[-1.749122, -71.864319, 107.796478],
[-1.723664, -72.409088, 108.613632],
[-1.698206, -72.912567, 109.368851],
[-1.672748, -73.373871, 110.060806],
[-1.647290, -73.792130, 110.688194],
[-1.621832, -74.166512, 111.249767],
[-1.596374, -74.496147, 111.744221],
[-1.570916, -74.780251, 112.170376],
[-1.545458, -75.017998, 112.526997],
[-1.520000, -75.208656, 112.812984],
[-1.494542, -75.351448, 113.027172],
[-1.469084, -75.445686, 113.168530],
[-1.443626, -75.490677, 113.236015],
[-1.418168, -75.485771, 113.228657],
[-1.392710, -75.430344, 113.145515],
[-1.367252, -75.323830, 112.985744],
[-1.341794, -75.165688, 112.748531],
[-1.316336, -74.955406, 112.433109],
[-1.290878, -74.692551, 112.038826],
[-1.265420, -74.376694, 111.565041],
[-1.239962, -74.007477, 111.011215],
[-1.214504, -73.584579, 110.376869],
[-1.189046, -73.107742, 109.661613],
[-1.163588, -72.576752, 108.865128],
[-1.138130, -71.991455, 107.987183],
[-1.112672, -71.351707, 107.027561],
[-1.087214, -70.657486, 105.986229],
[-1.061756, -69.908813, 104.863220],
[-1.036298, -69.105721, 103.658581],
[-1.010840, -68.248337, 102.372505],
[-0.985382, -67.336861, 101.005291],
[-0.959924, -66.371513, 99.557270],
[-0.934466, -65.352615, 98.028923],
[-0.909008, -64.280533, 96.420799],
[-0.883550, -63.155693, 94.733540],
[-0.858092, -61.978588, 92.967882],
[-0.832634, -60.749775, 91.124662],
[-0.807176, -59.469845, 89.204767],
[-0.781718, -58.139503, 87.209255],
[-0.756260, -56.759487, 85.139231],
[-0.730802, -55.330616, 82.995924],
[-0.705344, -53.853729, 80.780594],
[-0.679886, -52.329796, 78.494694],
[-0.654428, -50.759762, 76.139643],
[-0.628970, -49.144699, 73.717049],
[-0.603512, -47.485737, 71.228605],
[-0.578054, -45.784004, 68.676006],
[-0.552596, -44.040764, 66.061146],
[-0.527138, -42.257267, 63.385900],
[-0.501680, -40.434883, 60.652325],
[-0.476222, -38.574947, 57.862421],
[-0.450764, -36.678982, 55.018473],
[-0.425306, -34.748432, 52.122648],
[-0.399848, -32.784836, 49.177254],
[-0.374390, -30.789810, 46.184715],
[-0.348932, -28.764952, 43.147428],
[-0.323474, -26.711969, 40.067954],
[-0.298016, -24.632576, 36.948864],
[-0.272558, -22.528547, 33.792821],
[-0.247100, -20.401667, 30.602500],
])
"""Describes relationship between the joint angle (rad), the transmission ratio (in/out), and the output torque (N*m)
for the knees of the Boston Dynamics Spot robot.
"""
##
# Configuration
##
SPOT_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/BostonDynamics/spot/spot.usd",
activate_contact_sensors=True,
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
retain_accelerations=False,
linear_damping=0.0,
angular_damping=0.0,
max_linear_velocity=1000.0,
max_angular_velocity=1000.0,
max_depenetration_velocity=1.0,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=0
),
),
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 0.5),
joint_pos={
"[fh]l_hx": 0.1, # all left hip_x
"[fh]r_hx": -0.1, # all right hip_x
"f[rl]_hy": 0.9, # front hip_y
"h[rl]_hy": 1.1, # hind hip_y
".*_kn": -1.5, # all knees
},
joint_vel={".*": 0.0},
),
actuators={
"spot_hip": DelayedPDActuatorCfg(
joint_names_expr=[".*_h[xy]"],
effort_limit=45.0,
stiffness=60.0,
damping=1.5,
min_num_time_lags=0, # physics time steps (min: 2.0*0=0.0ms)
max_num_time_lags=4, # physics time steps (max: 2.0*4=8.0ms)
),
"spot_knee": RemotizedPDActuatorCfg(
joint_names_expr=[".*_kn"],
joint_parameter_lookup=joint_parameter_lookup,
effort_limit=None, # torque limits are handled based experimental data (:meth:`RemotizedPDActuatorCfg.data`)
stiffness=60.0,
damping=1.5,
min_num_time_lags=0, # physics time steps (min: 2.0*0=0.0ms)
max_num_time_lags=4, # physics time steps (max: 2.0*4=8.0ms)
),
},
)
"""Configuration for the Boston Dynamics Spot robot."""
We would like to acknowledge The AI Institute's efforts in developing the Spot MDP from specifications provided by Boston Dynamics.
They trained, verified, and deployed the resulting policy on the Spot hardware and demonstrated its capability and reliability out in the real world.
The accompanying deployment code and access to Spot's low-level API will be available in the Spot RL Researcher Kit.
We thank The AI Institute for their trailblazing use of and contributions to Isaac Lab and for sharing their code publicly with the community to promote wider use of the Nvidia RL ecosystem.
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import gymnasium as gym
from . import agents, flat_env_cfg
##
# Register Gym environments.
##
gym.register(
id="Isaac-Velocity-Flat-Spot-v0",
entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": flat_env_cfg.SpotFlatEnvCfg,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.SpotFlatPPORunnerCfg,
},
)
gym.register(
id="Isaac-Velocity-Flat-Spot-Play-v0",
entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": flat_env_cfg.SpotFlatEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.SpotFlatPPORunnerCfg,
},
)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_cfg # noqa: F401, F403
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.lab.utils import configclass
from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
@configclass
class SpotFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 35000
save_interval = 50
experiment_name = "spot_flat"
empirical_normalization = False
store_code_state = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[512, 256, 128],
critic_hidden_dims=[512, 256, 128],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=0.5,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0025,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import omni.isaac.lab.sim as sim_utils
import omni.isaac.lab.terrains as terrain_gen
from omni.isaac.lab.envs import ViewerCfg
from omni.isaac.lab.managers import EventTermCfg as EventTerm
from omni.isaac.lab.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
from omni.isaac.lab.managers import RewardTermCfg, SceneEntityCfg
from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
from omni.isaac.lab.terrains import TerrainImporterCfg
from omni.isaac.lab.utils import configclass
from omni.isaac.lab.utils.assets import ISAACLAB_NUCLEUS_DIR
from omni.isaac.lab.utils.noise import AdditiveUniformNoiseCfg as Unoise
import omni.isaac.lab_tasks.manager_based.locomotion.velocity.config.spot.mdp as spot_mdp
import omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp as mdp
from omni.isaac.lab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
##
# Pre-defined configs
##
from omni.isaac.lab_assets.spot import SPOT_CFG # isort: skip
COBBLESTONE_ROAD_CFG = terrain_gen.TerrainGeneratorCfg(
size=(8.0, 8.0),
border_width=20.0,
num_rows=9,
num_cols=21,
horizontal_scale=0.1,
vertical_scale=0.005,
slope_threshold=0.75,
difficulty_range=(0.0, 1.0),
use_cache=False,
sub_terrains={
"flat": terrain_gen.MeshPlaneTerrainCfg(proportion=0.2),
"random_rough": terrain_gen.HfRandomUniformTerrainCfg(
proportion=0.2, noise_range=(0.02, 0.05), noise_step=0.02, border_width=0.25
),
},
)
@configclass
class SpotActionsCfg:
"""Action specifications for the MDP."""
joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.2, use_default_offset=True)
@configclass
class SpotCommandsCfg:
"""Command specifications for the MDP."""
base_velocity = mdp.UniformVelocityCommandCfg(
asset_name="robot",
resampling_time_range=(10.0, 10.0),
rel_standing_envs=0.1,
rel_heading_envs=0.0,
heading_command=False,
debug_vis=True,
ranges=mdp.UniformVelocityCommandCfg.Ranges(
lin_vel_x=(-2.0, 4.0), lin_vel_y=(-1.5, 1.5), ang_vel_z=(-2.0, 2.0)
),
)
@configclass
class SpotObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# `` observation terms (order preserved)
base_lin_vel = ObsTerm(
func=mdp.base_lin_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1)
)
base_ang_vel = ObsTerm(
func=mdp.base_ang_vel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.1, n_max=0.1)
)
projected_gravity = ObsTerm(
func=mdp.projected_gravity,
params={"asset_cfg": SceneEntityCfg("robot")},
noise=Unoise(n_min=-0.05, n_max=0.05),
)
velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"})
joint_pos = ObsTerm(
func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.05, n_max=0.05)
)
joint_vel = ObsTerm(
func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot")}, noise=Unoise(n_min=-0.5, n_max=0.5)
)
actions = ObsTerm(func=mdp.last_action)
def __post_init__(self):
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class SpotEventCfg:
"""Configuration for randomization."""
# startup
physics_material = EventTerm(
func=mdp.randomize_rigid_body_material,
mode="startup",
params={
"asset_cfg": SceneEntityCfg("robot", body_names=".*"),
"static_friction_range": (0.3, 1.0),
"dynamic_friction_range": (0.3, 0.8),
"restitution_range": (0.0, 0.0),
"num_buckets": 64,
},
)
add_base_mass = EventTerm(
func=mdp.randomize_rigid_body_mass,
mode="startup",
params={
"asset_cfg": SceneEntityCfg("robot", body_names="body"),
"mass_distribution_params": (-2.5, 2.5),
"operation": "add",
},
)
# reset
base_external_force_torque = EventTerm(
func=mdp.apply_external_force_torque,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", body_names="body"),
"force_range": (0.0, 0.0),
"torque_range": (-0.0, 0.0),
},
)
reset_base = EventTerm(
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot"),
"pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
"velocity_range": {
"x": (-1.5, 1.5),
"y": (-1.0, 1.0),
"z": (-0.5, 0.5),
"roll": (-0.7, 0.7),
"pitch": (-0.7, 0.7),
"yaw": (-1.0, 1.0),
},
},
)
reset_robot_joints = EventTerm(
func=spot_mdp.reset_joints_around_default,
mode="reset",
params={
"position_range": (-0.2, 0.2),
"velocity_range": (-2.5, 2.5),
"asset_cfg": SceneEntityCfg("robot"),
},
)
# interval
push_robot = EventTerm(
func=mdp.push_by_setting_velocity,
mode="interval",
interval_range_s=(10.0, 15.0),
params={
"asset_cfg": SceneEntityCfg("robot"),
"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)},
},
)
@configclass
class SpotRewardsCfg:
# -- task
air_time = RewardTermCfg(
func=spot_mdp.air_time_reward,
weight=5.0,
params={"mode_time": 0.3, "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
)
base_angular_velocity = RewardTermCfg(
func=spot_mdp.base_angular_velocity_reward,
weight=5.0,
params={"std": 2.0, "asset_cfg": SceneEntityCfg("robot")},
)
base_linear_velocity = RewardTermCfg(
func=spot_mdp.base_linear_velocity_reward,
weight=5.0,
params={"std": 1.0, "ramp_rate": 0.5, "ramp_at_vel": 1.0, "asset_cfg": SceneEntityCfg("robot")},
)
foot_clearance = RewardTermCfg(
func=spot_mdp.foot_clearance_reward,
weight=0.5,
params={
"std": 0.05,
"tanh_mult": 2.0,
"target_height": 0.1,
"asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"),
},
)
gait = RewardTermCfg(
func=spot_mdp.gait_reward,
weight=10.0,
params={"std": 0.1, "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
)
# -- penalties
action_smoothness = RewardTermCfg(func=spot_mdp.action_smoothness_penalty, weight=-1.0)
air_time_variance = RewardTermCfg(
func=spot_mdp.air_time_variance_penalty,
weight=-1.0,
params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot")},
)
base_motion = RewardTermCfg(
func=spot_mdp.base_motion_penalty, weight=-2.0, params={"asset_cfg": SceneEntityCfg("robot")}
)
base_orientation = RewardTermCfg(
func=spot_mdp.base_orientation_penalty, weight=-3.0, params={"asset_cfg": SceneEntityCfg("robot")}
)
foot_slip = RewardTermCfg(
func=spot_mdp.foot_slip_penalty,
weight=-0.5,
params={
"asset_cfg": SceneEntityCfg("robot", body_names=".*_foot"),
"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_foot"),
"threshold": 1.0,
},
)
joint_acc = RewardTermCfg(
func=spot_mdp.joint_acceleration_penalty,
weight=-1.0e-4,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")},
)
joint_pos = RewardTermCfg(
func=spot_mdp.joint_position_penalty,
weight=-0.7,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*"), "stand_still_scale": 5.0},
)
joint_torques = RewardTermCfg(
func=spot_mdp.joint_torques_penalty,
weight=-5.0e-4,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*")},
)
joint_vel = RewardTermCfg(
func=spot_mdp.joint_velocity_penalty,
weight=-1.0e-2,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_h[xy]")},
)
@configclass
class SpotTerminationsCfg:
"""Termination terms for the MDP."""
time_out = DoneTerm(func=mdp.time_out, time_out=True)
body_contact = DoneTerm(
func=mdp.illegal_contact,
params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=["body", ".*leg"]), "threshold": 1.0},
)
out_of_bounds = DoneTerm(
func=spot_mdp.terminations.terrain_out_of_bounds,
params={"asset_cfg": SceneEntityCfg("robot"), "distance_buffer": 3.0},
time_out=True,
)
@configclass
class SpotCurriculumCfg:
"""Curriculum terms for the MDP."""
pass
@configclass
class SpotFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
# Basic settings'
observations: SpotObservationsCfg = SpotObservationsCfg()
actions: SpotActionsCfg = SpotActionsCfg()
commands: SpotCommandsCfg = SpotCommandsCfg()
# MDP setting
rewards: SpotRewardsCfg = SpotRewardsCfg()
terminations: SpotTerminationsCfg = SpotTerminationsCfg()
events: SpotEventCfg = SpotEventCfg()
curriculum: SpotCurriculumCfg = SpotCurriculumCfg()
# Viewer
viewer = ViewerCfg(eye=(10.5, 10.5, 0.3), origin_type="world", env_index=0, asset_name="robot")
def __post_init__(self):
# post init of parent
super().__post_init__()
# general settings
self.decimation = 10 # 50 Hz
self.episode_length_s = 20.0
# simulation settings
self.sim.dt = 0.002 # 500 Hz
self.sim.substeps = 1
self.sim.disable_contact_processing = True
self.sim.physics_material.static_friction = 1.0
self.sim.physics_material.dynamic_friction = 1.0
self.sim.physics_material.friction_combine_mode = "multiply"
self.sim.physics_material.restitution_combine_mode = "multiply"
# update sensor update periods
# we tick all the sensors based on the smallest update period (physics update period)
self.scene.contact_forces.update_period = self.sim.dt
# switch robot to Spot-d
self.scene.robot = SPOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
# terrain
self.scene.terrain = TerrainImporterCfg(
prim_path="/World/ground",
terrain_type="generator",
terrain_generator=COBBLESTONE_ROAD_CFG,
max_init_terrain_level=COBBLESTONE_ROAD_CFG.num_rows - 1,
collision_group=-1,
physics_material=sim_utils.RigidBodyMaterialCfg(
friction_combine_mode="multiply",
restitution_combine_mode="multiply",
static_friction=1.0,
dynamic_friction=1.0,
),
visual_material=sim_utils.MdlFileCfg(
mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl",
project_uvw=True,
texture_scale=(0.25, 0.25),
),
debug_vis=True,
)
# no height scan
self.scene.height_scanner = None
class SpotFlatEnvCfg_PLAY(SpotFlatEnvCfg):
def __post_init__(self) -> None:
# post init of parent
super().__post_init__()
# make a smaller scene for play
self.scene.num_envs = 50
self.scene.env_spacing = 2.5
# spawn the robot randomly in the grid (instead of their terrain levels)
self.scene.terrain.max_init_terrain_level = None
# reduce the number of terrains to save memory
if self.scene.terrain.terrain_generator is not None:
self.scene.terrain.terrain_generator.num_rows = 5
self.scene.terrain.terrain_generator.num_cols = 5
self.scene.terrain.terrain_generator.curriculum = False
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing event
# self.events.base_external_force_torque = None
# self.events.push_robot = None
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the Spot locomotion task."""
from .events import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .terminations import * # noqa: F401, F403
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that can be used to enable Spot randomizations.
The functions can be passed to the :class:`omni.isaac.lab.managers.EventTermCfg` object to enable
the randomization introduced by the function.
"""
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.lab.assets import Articulation
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.utils.math import sample_uniform
if TYPE_CHECKING:
from omni.isaac.lab.envs import ManagerBasedEnv
def reset_joints_around_default(
env: ManagerBasedEnv,
env_ids: torch.Tensor,
position_range: tuple[float, float],
velocity_range: tuple[float, float],
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
):
"""Reset the robot joints in the interval around the default position and velocity by the given ranges.
This function samples random values from the given ranges around the default joint positions and velocities.
The ranges are clipped to fit inside the soft joint limits. The sampled values are then set into the physics
simulation.
"""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# get default joint state
joint_min_pos = asset.data.default_joint_pos[env_ids] + position_range[0]
joint_max_pos = asset.data.default_joint_pos[env_ids] + position_range[1]
joint_min_vel = asset.data.default_joint_vel[env_ids] + velocity_range[0]
joint_max_vel = asset.data.default_joint_vel[env_ids] + velocity_range[1]
# clip pos to range
joint_pos_limits = asset.data.soft_joint_pos_limits[env_ids, ...]
joint_min_pos = torch.clamp(joint_min_pos, min=joint_pos_limits[..., 0], max=joint_pos_limits[..., 1])
joint_max_pos = torch.clamp(joint_max_pos, min=joint_pos_limits[..., 0], max=joint_pos_limits[..., 1])
# clip vel to range
joint_vel_abs_limits = asset.data.soft_joint_vel_limits[env_ids]
joint_min_vel = torch.clamp(joint_min_vel, min=-joint_vel_abs_limits, max=joint_vel_abs_limits)
joint_max_vel = torch.clamp(joint_max_vel, min=-joint_vel_abs_limits, max=joint_vel_abs_limits)
# sample these values randomly
joint_pos = sample_uniform(joint_min_pos, joint_max_pos, joint_min_pos.shape, joint_min_pos.device)
joint_vel = sample_uniform(joint_min_vel, joint_max_vel, joint_min_vel.shape, joint_min_vel.device)
# set into the physics simulation
asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.lab.assets import Articulation, RigidObject
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.sensors import ContactSensor
if TYPE_CHECKING:
from omni.isaac.lab.envs import ManagerBasedRLEnv
# -- Task Rewards
def air_time_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, mode_time: float) -> torch.Tensor:
"""Reward longer feet air and contact time"""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
if contact_sensor.cfg.track_air_time is False:
raise RuntimeError("Activate ContactSensor's track_air_time!")
# compute the reward
current_air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
current_contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
t_max = torch.max(current_air_time, current_contact_time)
t_min = torch.clip(t_max, max=mode_time)
stance_cmd_reward = torch.clip(current_contact_time - current_air_time, -mode_time, mode_time)
cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1).unsqueeze(dim=1).expand(-1, 4)
reward = torch.where(cmd > 0.0, torch.where(t_max < mode_time, t_min, 0), stance_cmd_reward)
return torch.sum(reward, dim=1)
def base_angular_velocity_reward(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, std: float) -> torch.Tensor:
"""Reward tracking of angular velocity commands (yaw) using abs exponential kernel."""
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
# compute the error
target = env.command_manager.get_command("base_velocity")[:, 2]
ang_vel_error = torch.linalg.norm((target - asset.data.root_ang_vel_b[:, 2]).unsqueeze(1), dim=1)
return torch.exp(-ang_vel_error / std)
def base_linear_velocity_reward(
env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, std: float, ramp_at_vel: float = 1.0, ramp_rate: float = 0.5
) -> torch.Tensor:
"""Reward tracking of linear velocity commands (xy axes) using abs exponential kernel."""
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
# compute the error
target = env.command_manager.get_command("base_velocity")[:, :2]
lin_vel_error = torch.linalg.norm((target - asset.data.root_lin_vel_b[:, :2]), dim=1)
# fixed 1.0 multiple for tracking below the ramp_at_vel value, then scale by the rate above
vel_cmd_magnitude = torch.linalg.norm(target, dim=1)
velocity_scaling_multiple = torch.clamp(1.0 + ramp_rate * (vel_cmd_magnitude - ramp_at_vel), min=1.0)
return torch.exp(-lin_vel_error / std) * velocity_scaling_multiple
# ! need to finalize logic, params, and docstring
def gait_reward(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, std: float) -> torch.Tensor:
"""Penalize ..."""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
if contact_sensor.cfg.track_air_time is False:
raise RuntimeError("Activate ContactSensor's track_air_time!")
# compute the reward
air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
max_err = 0.2
indices_0 = [0, 1]
indices_1 = [2, 3]
cmd = torch.norm(env.command_manager.get_command("base_velocity"), dim=1)
asym_err_0 = torch.clip(
torch.square(air_time[:, indices_0[0]] - contact_time[:, indices_0[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[0]] - air_time[:, indices_0[1]]), max=max_err**2)
asym_err_1 = torch.clip(
torch.square(air_time[:, indices_1[0]] - contact_time[:, indices_1[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_1[0]] - air_time[:, indices_1[1]]), max=max_err**2)
asym_err_2 = torch.clip(
torch.square(air_time[:, indices_0[0]] - contact_time[:, indices_1[0]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[0]] - air_time[:, indices_1[0]]), max=max_err**2)
asym_err_3 = torch.clip(
torch.square(air_time[:, indices_0[1]] - contact_time[:, indices_1[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[1]] - air_time[:, indices_1[1]]), max=max_err**2)
sym_err_0 = torch.clip(
torch.square(air_time[:, indices_0[0]] - air_time[:, indices_1[1]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[0]] - contact_time[:, indices_1[1]]), max=max_err**2)
sym_err_1 = torch.clip(
torch.square(air_time[:, indices_0[1]] - air_time[:, indices_1[0]]), max=max_err**2
) + torch.clip(torch.square(contact_time[:, indices_0[1]] - contact_time[:, indices_1[0]]), max=max_err**2)
gait_err = asym_err_0 + asym_err_1 + sym_err_0 + sym_err_1 + asym_err_2 + asym_err_3
return torch.where(cmd > 0.0, torch.exp(-gait_err / std), 0.0)
def foot_clearance_reward(
env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, target_height: float, std: float, tanh_mult: float
) -> torch.Tensor:
"""Reward the swinging feet for clearing a specified height off the ground"""
asset: RigidObject = env.scene[asset_cfg.name]
foot_z_target_error = torch.square(asset.data.body_pos_w[:, asset_cfg.body_ids, 2] - target_height)
foot_velocity_tanh = torch.tanh(tanh_mult * torch.norm(asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2], dim=2))
reward = foot_z_target_error * foot_velocity_tanh
return torch.exp(-torch.sum(reward, dim=1) / std)
# -- Regularization Penalties
def action_smoothness_penalty(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Penalize large instantaneous changes in the network action output"""
return torch.linalg.norm((env.action_manager.action - env.action_manager.prev_action), dim=1)
def air_time_variance_penalty(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize variance in the amount of time each foot spends in the air/on the ground relative to each other"""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
if contact_sensor.cfg.track_air_time is False:
raise RuntimeError("Activate ContactSensor's track_air_time!")
# compute the reward
last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
last_contact_time = contact_sensor.data.last_contact_time[:, sensor_cfg.body_ids]
return torch.var(torch.clip(last_air_time, max=0.5), dim=1) + torch.var(
torch.clip(last_contact_time, max=0.5), dim=1
)
# ! look into simplifying the kernel here; it's a little oddly complex
def base_motion_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize base vertical and roll/pitch velocity"""
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
return 0.8 * torch.square(asset.data.root_lin_vel_b[:, 2]) + 0.2 * torch.sum(
torch.abs(asset.data.root_ang_vel_b[:, :2]), dim=1
)
def base_orientation_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize non-flat base orientation
This is computed by penalizing the xy-components of the projected gravity vector.
"""
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
return torch.linalg.norm((asset.data.projected_gravity_b[:, :2]), dim=1)
def foot_slip_penalty(
env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, sensor_cfg: SceneEntityCfg, threshold: float
) -> torch.Tensor:
"""Penalize foot planar (xy) slip when in contact with the ground"""
asset: RigidObject = env.scene[asset_cfg.name]
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# check if contact force is above threshold
net_contact_forces = contact_sensor.data.net_forces_w_history
is_contact = torch.max(torch.norm(net_contact_forces[:, :, sensor_cfg.body_ids], dim=-1), dim=1)[0] > threshold
foot_planar_velocity = torch.linalg.norm(asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2], dim=2)
reward = is_contact * foot_planar_velocity
return torch.sum(reward, dim=1)
def joint_acceleration_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint accelerations on the articulation."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return torch.linalg.norm((asset.data.joint_acc), dim=1)
def joint_position_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg, stand_still_scale: float) -> torch.Tensor:
"""Penalize joint position error from default on the articulation."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
cmd = torch.linalg.norm(env.command_manager.get_command("base_velocity"), dim=1)
body_vel = torch.linalg.norm(asset.data.root_lin_vel_b[:, :2], dim=1)
reward = torch.linalg.norm((asset.data.joint_pos - asset.data.default_joint_pos), dim=1)
return torch.where(torch.logical_or(cmd > 0.0, body_vel > 0.5), reward, stand_still_scale * reward)
def joint_torques_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint torques on the articulation."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return torch.linalg.norm((asset.data.applied_torque), dim=1)
def joint_velocity_penalty(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint velocities on the articulation."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return torch.linalg.norm((asset.data.joint_vel), dim=1)
# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Common functions that can be used to activate certain terminations.
The functions can be passed to the :class:`omni.isaac.lab.managers.TerminationTermCfg` object to enable
the termination introduced by the function.
"""
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.lab.assets import RigidObject
from omni.isaac.lab.managers import SceneEntityCfg
if TYPE_CHECKING:
from omni.isaac.lab.envs import ManagerBasedRLEnv
"""
Terrain size limits.
"""
def terrain_out_of_bounds(
env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), distance_buffer: float = 3.0
) -> torch.Tensor:
"""Terminate when agents move too close to the edge of the terrain."""
# extract the used quantities (to enable type-hinting)
asset: RigidObject = env.scene[asset_cfg.name]
def get_map_size(env: ManagerBasedRLEnv) -> tuple[float, float]:
grid_width, grid_length = env.scene.terrain.cfg.terrain_generator.size
n_cols = env.scene.terrain.cfg.terrain_generator.num_cols
n_rows = env.scene.terrain.cfg.terrain_generator.num_rows
border_width = env.scene.terrain.cfg.terrain_generator.border_width
length = n_cols * grid_length + 2 * border_width
width = n_rows * grid_width + 2 * border_width
return (width, length)
if env.scene.cfg.terrain.terrain_type == "plane":
return False
elif env.scene.cfg.terrain.terrain_type == "generator":
map_width, map_height = get_map_size(env)
x_out_of_bounds = torch.abs(asset.data.root_pos_w[:, 0]) > 0.5 * map_width - distance_buffer
y_out_of_bounds = torch.abs(asset.data.root_pos_w[:, 1]) > 0.5 * map_height - distance_buffer
return torch.logical_or(x_out_of_bounds, y_out_of_bounds)
else:
raise ValueError("Received unsupported terrain type, must be either 'plane' or 'generator'")
......@@ -76,7 +76,7 @@ class MySceneCfg(InteractiveSceneCfg):
sky_light = AssetBaseCfg(
prim_path="/World/skyLight",
spawn=sim_utils.DomeLightCfg(
intensity=900.0,
intensity=750.0,
texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr",
),
)
......
......@@ -44,6 +44,7 @@ from omni.isaac.lab.assets import Articulation
# Pre-defined configs
##
from omni.isaac.lab_assets.anymal import ANYMAL_B_CFG, ANYMAL_C_CFG, ANYMAL_D_CFG # isort:skip
from omni.isaac.lab_assets.spot import SPOT_CFG # isort:skip
from omni.isaac.lab_assets.unitree import UNITREE_A1_CFG, UNITREE_GO1_CFG, UNITREE_GO2_CFG # isort:skip
......@@ -73,7 +74,7 @@ def design_scene() -> tuple[dict, list[list[float]]]:
# Create separate groups called "Origin1", "Origin2", "Origin3"
# Each group will have a mount and a robot on top of it
origins = define_origins(num_origins=6, spacing=1.25)
origins = define_origins(num_origins=7, spacing=1.25)
# Origin 1 with Anymal B
prim_utils.create_prim("/World/Origin1", "Xform", translation=origins[0])
......@@ -105,6 +106,11 @@ def design_scene() -> tuple[dict, list[list[float]]]:
# -- Robot
unitree_go2 = Articulation(UNITREE_GO2_CFG.replace(prim_path="/World/Origin6/Robot"))
# Origin 7 with Boston Dynamics Spot
prim_utils.create_prim("/World/Origin7", "Xform", translation=origins[5])
# -- Robot
spot = Articulation(SPOT_CFG.replace(prim_path="/World/Origin7/Robot"))
# return the scene information
scene_entities = {
"anymal_b": anymal_b,
......@@ -113,6 +119,7 @@ def design_scene() -> tuple[dict, list[list[float]]]:
"unitree_a1": unitree_a1,
"unitree_go1": unitree_go1,
"unitree_go2": unitree_go2,
"spot": spot,
}
return scene_entities, origins
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment