Commit 8a58a23c authored by peterd-NV's avatar peterd-NV Committed by Kelly Guo

Updates Mimic APIs/configs/docs for future dexmimic compatibility (#216)

# Description

Doc and config changes from @karsten-nvidia:
- Add additional details on custom environments to mimic docs.
- Update comments in mimic configuration to make it easier telling apart
what's important.
- Some minor cleanups in existing docs.
- Add "common pitfalls" section to docs to guide users how to get
successful data generation/training

Mimic API and config changes to support forward dexmimic compatibility:
- Use dictionaries of subtasks in mimic env config; keys are eef_names
- Mimic Env APIs now use dictionary of eef_names to enable mulit-eef
support in future
- Data generation code updated accordingly to use new Mimic env APIs

## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- Bug fix (non-breaking change which fixes an issue)

## Screenshots

Please attach before and after screenshots of the change if applicable.

<!--
Example:

| Before | After |
| ------ | ----- |
| _gif/png before_ | _gif/png after_ |

To upload images to a PR -- simply drag and drop an image while in edit
mode and it should upload the image directly. You can then paste that
source into the above before/after sections.
-->

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [ ] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->

---------
Signed-off-by: 's avatarpeterd-NV <peterd@nvidia.com>
Signed-off-by: 's avatarKelly Guo <kellyg@nvidia.com>
Signed-off-by: 's avatarKelly Guo <kellyguo123@hotmail.com>
Signed-off-by: 's avatarAshwin Varghese Kuruttukulam <123109010+ashwinvkNV@users.noreply.github.com>
Co-authored-by: 's avatarCY Chen <cyc@nvidia.com>
Co-authored-by: 's avataroahmednv <oahmed@Nvidia.com>
Co-authored-by: 's avatarToni-SM <aserranomuno@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyguo123@hotmail.com>
Co-authored-by: 's avatarrwiltz <165190220+rwiltz@users.noreply.github.com>
Co-authored-by: 's avatarnv-cupright <92540563+nv-cupright@users.noreply.github.com>
Co-authored-by: 's avatarAlexander Poddubny <143108850+nv-apoddubny@users.noreply.github.com>
Co-authored-by: 's avatarchengronglai <chengrongl@nvidia.com>
Co-authored-by: 's avatarDavid Hoeller <dhoeller@nvidia.com>
Co-authored-by: 's avatarmatthewtrepte <mtrepte@nvidia.com>
Co-authored-by: 's avatarAshwin Varghese Kuruttukulam <123109010+ashwinvkNV@users.noreply.github.com>
Co-authored-by: 's avatarKarsten Patzwaldt <kpatzwaldt@nvidia.com>
parent 31f4e9cd
...@@ -39,13 +39,6 @@ Datagen Info Pool ...@@ -39,13 +39,6 @@ Datagen Info Pool
:members: :members:
:inherited-members: :inherited-members:
Selection Strategy
------------------
.. autoclass:: SelectionStrategy
:members:
:inherited-members:
Random Strategy Random Strategy
--------------- ---------------
......
This diff is collapsed.
...@@ -55,6 +55,7 @@ import isaaclab_mimic.envs # noqa: F401 ...@@ -55,6 +55,7 @@ import isaaclab_mimic.envs # noqa: F401
# Only enables inputs if this script is NOT headless mode # Only enables inputs if this script is NOT headless mode
if not args_cli.headless and not os.environ.get("HEADLESS", 0): if not args_cli.headless and not os.environ.get("HEADLESS", 0):
from isaaclab.devices import Se3Keyboard from isaaclab.devices import Se3Keyboard
from isaaclab.envs import ManagerBasedRLMimicEnv
from isaaclab.envs.mdp.recorders.recorders_cfg import ActionStateRecorderManagerCfg from isaaclab.envs.mdp.recorders.recorders_cfg import ActionStateRecorderManagerCfg
from isaaclab.managers import RecorderTerm, RecorderTermCfg from isaaclab.managers import RecorderTerm, RecorderTermCfg
from isaaclab.utils import configclass from isaaclab.utils import configclass
...@@ -88,11 +89,16 @@ class PreStepDatagenInfoRecorder(RecorderTerm): ...@@ -88,11 +89,16 @@ class PreStepDatagenInfoRecorder(RecorderTerm):
"""Recorder term that records the datagen info data in each step.""" """Recorder term that records the datagen info data in each step."""
def record_pre_step(self): def record_pre_step(self):
eef_pose_dict = {}
for eef_name in self._env.cfg.subtask_configs.keys():
eef_pose_dict[eef_name] = self._env.get_robot_eef_pose(eef_name)
datagen_info = { datagen_info = {
"object_pose": self._env.scene.get_state(is_relative=True)["rigid_object"], "object_pose": self._env.get_object_poses(),
"target_eef_pose": self._env.action_to_target_eef_pos(self._env.action_manager.action), "eef_pose": eef_pose_dict,
"target_eef_pose": self._env.action_to_target_eef_pose(self._env.action_manager.action),
} }
return "obs", datagen_info return "obs/datagen_info", datagen_info
@configclass @configclass
...@@ -106,7 +112,7 @@ class PreStepSubtaskTermsObservationsRecorder(RecorderTerm): ...@@ -106,7 +112,7 @@ class PreStepSubtaskTermsObservationsRecorder(RecorderTerm):
"""Recorder term that records the subtask completion observations in each step.""" """Recorder term that records the subtask completion observations in each step."""
def record_pre_step(self): def record_pre_step(self):
return "obs/subtask_term_signals", self._env.obs_buf["subtask_terms"] return "obs/datagen_info/subtask_term_signals", self._env.get_subtask_term_signals()
@configclass @configclass
...@@ -164,12 +170,26 @@ def main(): ...@@ -164,12 +170,26 @@ def main():
# Set up recorder terms for mimic annotations # Set up recorder terms for mimic annotations
env_cfg.env_name = args_cli.task env_cfg.env_name = args_cli.task
env_cfg.recorders: MimicRecorderManagerCfg = MimicRecorderManagerCfg() env_cfg.recorders: MimicRecorderManagerCfg = MimicRecorderManagerCfg()
if not args_cli.auto:
# disable subtask term signals recorder term if in manual mode
env_cfg.recorders.record_pre_step_subtask_term_signals = None
env_cfg.recorders.dataset_export_dir_path = output_dir env_cfg.recorders.dataset_export_dir_path = output_dir
env_cfg.recorders.dataset_filename = output_file_name env_cfg.recorders.dataset_filename = output_file_name
# create environment from loaded config # create environment from loaded config
env = gym.make(args_cli.task, cfg=env_cfg) env = gym.make(args_cli.task, cfg=env_cfg)
if not isinstance(env.unwrapped, ManagerBasedRLMimicEnv):
raise ValueError("The environment should be derived from ManagerBasedRLMimicEnv")
if args_cli.auto:
# check if the mimic API env.unwrapped.get_subtask_term_signals() is implemented
if env.unwrapped.get_subtask_term_signals.__func__ is ManagerBasedRLMimicEnv.get_subtask_term_signals:
raise NotImplementedError(
"The environment does not implement the get_subtask_term_signals method required "
"to run automatic annotations."
)
# reset environment # reset environment
env.reset() env.reset()
...@@ -219,13 +239,12 @@ def main(): ...@@ -219,13 +239,12 @@ def main():
f" to number of subtasks {len(args_cli.signals)}" f" to number of subtasks {len(args_cli.signals)}"
) )
annotated_episode = env.unwrapped.recorder_manager.get_episode(0) annotated_episode = env.unwrapped.recorder_manager.get_episode(0)
del annotated_episode.data["obs"]["subtask_term_signals"]
for subtask_index in range(len(args_cli.signals)): for subtask_index in range(len(args_cli.signals)):
# subtask termination signal is false until subtask is complete, and true afterwards # subtask termination signal is false until subtask is complete, and true afterwards
subtask_signals = torch.ones(len(actions), dtype=torch.bool) subtask_signals = torch.ones(len(actions), dtype=torch.bool)
subtask_signals[: subtask_indices[subtask_index]] = False subtask_signals[: subtask_indices[subtask_index]] = False
annotated_episode.add( annotated_episode.add(
f"obs/subtask_term_signals/{args_cli.signals[subtask_index]}", subtask_signals f"obs/datagen_info/subtask_term_signals/{args_cli.signals[subtask_index]}", subtask_signals
) )
# set success to the recorded episode data and export to file # set success to the recorded episode data and export to file
......
...@@ -85,6 +85,7 @@ from isaaclab_mimic.datagen.data_generator import DataGenerator ...@@ -85,6 +85,7 @@ from isaaclab_mimic.datagen.data_generator import DataGenerator
from isaaclab_mimic.datagen.datagen_info_pool import DataGenInfoPool from isaaclab_mimic.datagen.datagen_info_pool import DataGenInfoPool
from isaaclab.devices import Se3Keyboard, Se3SpaceMouse from isaaclab.devices import Se3Keyboard, Se3SpaceMouse
from isaaclab.envs import ManagerBasedRLMimicEnv
from isaaclab.envs.mdp.recorders.recorders_cfg import ActionStateRecorderManagerCfg from isaaclab.envs.mdp.recorders.recorders_cfg import ActionStateRecorderManagerCfg
from isaaclab.managers import DatasetExportMode, RecorderTerm, RecorderTermCfg from isaaclab.managers import DatasetExportMode, RecorderTerm, RecorderTermCfg
from isaaclab.utils import configclass from isaaclab.utils import configclass
...@@ -104,11 +105,16 @@ class PreStepDatagenInfoRecorder(RecorderTerm): ...@@ -104,11 +105,16 @@ class PreStepDatagenInfoRecorder(RecorderTerm):
"""Recorder term that records the datagen info data in each step.""" """Recorder term that records the datagen info data in each step."""
def record_pre_step(self): def record_pre_step(self):
eef_pose_dict = {}
for eef_name in self._env.cfg.subtask_configs.keys():
eef_pose_dict[eef_name] = self._env.get_robot_eef_pose(eef_name)
datagen_info = { datagen_info = {
"object_pose": self._env.scene.get_state(is_relative=True)["rigid_object"], "object_pose": self._env.get_object_poses(),
"target_eef_pose": self._env.action_to_target_eef_pos(self._env.action_manager.action), "eef_pose": eef_pose_dict,
"target_eef_pose": self._env.action_to_target_eef_pose(self._env.action_manager.action),
} }
return "obs", datagen_info return "obs/datagen_info", datagen_info
@configclass @configclass
...@@ -122,7 +128,7 @@ class PreStepSubtaskTermsObservationsRecorder(RecorderTerm): ...@@ -122,7 +128,7 @@ class PreStepSubtaskTermsObservationsRecorder(RecorderTerm):
"""Recorder term that records the subtask completion observations in each step.""" """Recorder term that records the subtask completion observations in each step."""
def record_pre_step(self): def record_pre_step(self):
return "obs/subtask_term_signals", self._env.obs_buf["subtask_terms"] return "obs/datagen_info/subtask_term_signals", self._env.get_subtask_term_signals()
@configclass @configclass
...@@ -397,6 +403,15 @@ def main(): ...@@ -397,6 +403,15 @@ def main():
# create environment # create environment
env = gym.make(env_name, cfg=env_cfg) env = gym.make(env_name, cfg=env_cfg)
if not isinstance(env.unwrapped, ManagerBasedRLMimicEnv):
raise ValueError("The environment should be derived from ManagerBasedRLMimicEnv")
# check if the mimic API env.unwrapped.get_subtask_term_signals() is implemented
if env.unwrapped.get_subtask_term_signals.__func__ is ManagerBasedRLMimicEnv.get_subtask_term_signals:
raise NotImplementedError(
"The environment does not implement the get_subtask_term_signals method required to run this script."
)
# set seed for generation # set seed for generation
random.seed(env.unwrapped.cfg.datagen_config.seed) random.seed(env.unwrapped.cfg.datagen_config.seed)
np.random.seed(env.unwrapped.cfg.datagen_config.seed) np.random.seed(env.unwrapped.cfg.datagen_config.seed)
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
# #
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
import torch
from collections.abc import Sequence
import isaaclab.utils.math as PoseUtils import isaaclab.utils.math as PoseUtils
from isaaclab.envs import ManagerBasedRLEnv from isaaclab.envs import ManagerBasedRLEnv
...@@ -24,85 +27,104 @@ class ManagerBasedRLMimicEnv(ManagerBasedRLEnv): ...@@ -24,85 +27,104 @@ class ManagerBasedRLMimicEnv(ManagerBasedRLEnv):
proficient agents through imitation learning on diverse configurations of scenes, object instances, etc. proficient agents through imitation learning on diverse configurations of scenes, object instances, etc.
Key Features: Key Features:
- Efficient Dataset Generation: Utilizes a small set of human demos to produce large scale demonstrations. - Efficient Dataset Generation: Utilizes a small set of human demos to produce large scale demonstrations.
- Broad Applicability: Capable of supporting tasks that require a range of manipulation skills, such as - Broad Applicability: Capable of supporting tasks that require a range of manipulation skills, such as
pick-and-place and interacting with articulated objects. pick-and-place and interacting with articulated objects.
- Dataset Versatility: The synthetic data retains a quality that compares favorably with additional human demos. - Dataset Versatility: The synthetic data retains a quality that compares favorably with additional human demos.
""" """
def get_robot_eef_pose(self, env_ind=0): def get_robot_eef_pose(self, eef_name: str, env_ids: Sequence[int] | None = None) -> torch.Tensor:
""" """
Get current robot end effector pose. Should be the same frame as used by the robot end-effector controller. Get current robot end effector pose. Should be the same frame as used by the robot end-effector controller.
Args:
eef_name: Name of the end effector.
env_ids: Environment indices to get the pose for. If None, all envs are considered.
Returns: Returns:
pose (torch.Tensor): 4x4 eef pose matrix A torch.Tensor eef pose matrix. Shape is (len(env_ids), 4, 4)
""" """
raise NotImplementedError raise NotImplementedError
def target_eef_pose_to_action(self, target_eef_pose, relative=True, env_ind=0): def target_eef_pose_to_action(
self, target_eef_pose_dict: dict, gripper_action_dict: dict, noise: float | None = None, env_id: int = 0
) -> torch.Tensor:
""" """
Takes a target pose for the end effector controller and returns an action Takes a target pose and gripper action for the end effector controller and returns an action
to try and achieve that target pose. (usually a normalized delta pose action) to try and achieve that target pose.
Noise is added to the target pose action if specified.
Args: Args:
target_eef_pose (torch.Tensor): 4x4 target eef pose target_eef_pose_dict: Dictionary of 4x4 target eef pose for each end-effector.
relative (bool): if True, use relative pose actions, else absolute pose actions gripper_action_dict: Dictionary of gripper actions for each end-effector.
noise: Noise to add to the action. If None, no noise is added.
env_id: Environment index to compute the action for.
Returns: Returns:
action (torch.Tensor): action compatible with env.step (minus gripper actuation) An action torch.Tensor that's compatible with env.step().
""" """
raise NotImplementedError raise NotImplementedError
def action_to_target_eef_pos(self, action, relative=True, env_ind=0): def action_to_target_eef_pose(self, action: torch.Tensor) -> dict[str, torch.Tensor]:
""" """
Converts action (compatible with env.step) to a target pose for the end effector controller. Converts action (compatible with env.step) to a target pose for the end effector controller.
Inverse of @target_eef_pose_to_action. Usually used to infer a sequence of target controller poses Inverse of @target_eef_pose_to_action. Usually used to infer a sequence of target controller poses
from a demonstration trajectory using the recorded actions. from a demonstration trajectory using the recorded actions.
Args: Args:
action (torch.Tensor): environment action action: Environment action. Shape is (num_envs, action_dim).
relative (bool): if True, use relative pose actions, else absolute pose actions
Returns: Returns:
target_eef_pose (torch.Tensor): 4x4 target eef pose that @action corresponds to A dictionary of eef pose torch.Tensor that @action corresponds to.
""" """
raise NotImplementedError raise NotImplementedError
def action_to_gripper_action(self, action): def actions_to_gripper_actions(self, actions: torch.Tensor) -> dict[str, torch.Tensor]:
""" """
Extracts the gripper actuation part of an action (compatible with env.step). Extracts the gripper actuation part from a sequence of env actions (compatible with env.step).
Args: Args:
action (torch.Tensor): environment action actions: environment actions. The shape is (num_envs, num steps in a demo, action_dim).
Returns: Returns:
gripper_action (torch.Tensor): subset of environment action for gripper actuation A dictionary of torch.Tensor gripper actions. Key to each dict is an eef_name.
""" """
raise NotImplementedError raise NotImplementedError
def get_object_poses(self, env_ind=0): def get_object_poses(self, env_ids: Sequence[int] | None = None):
""" """
Gets the pose of each object relevant to Isaac Lab Mimic data generation in the current scene. Gets the pose of each object relevant to Isaac Lab Mimic data generation in the current scene.
Args:
env_ids: Environment indices to get the pose for. If None, all envs are considered.
Returns: Returns:
object_poses (dict): dictionary that maps object name (str) to object pose matrix (4x4 torch.Tensor) A dictionary that maps object names to object pose matrix (4x4 torch.Tensor)
""" """
if env_ids is None:
env_ids = slice(None)
rigid_object_states = self.scene.get_state(is_relative=True)["rigid_object"] rigid_object_states = self.scene.get_state(is_relative=True)["rigid_object"]
object_pose_matrix = dict() object_pose_matrix = dict()
for obj_name, obj_state in rigid_object_states.items(): for obj_name, obj_state in rigid_object_states.items():
object_pose_matrix[obj_name] = PoseUtils.make_pose( object_pose_matrix[obj_name] = PoseUtils.make_pose(
obj_state["root_pose"][env_ind, :3], PoseUtils.matrix_from_quat(obj_state["root_pose"][env_ind, 3:7]) obj_state["root_pose"][env_ids, :3], PoseUtils.matrix_from_quat(obj_state["root_pose"][env_ids, 3:7])
) )
return object_pose_matrix return object_pose_matrix
def get_subtask_term_signals(self, env_ind=0): def get_subtask_term_signals(self, env_ids: Sequence[int] | None = None) -> dict[str, torch.Tensor]:
""" """
Gets a dictionary of binary flags for each subtask in a task. The flag is 1 Gets a dictionary of termination signal flags for each subtask in a task. The flag is 1
when the subtask has been completed and 0 otherwise. Isaac Lab Mimic only uses this when the subtask has been completed and 0 otherwise. The implementation of this method is
when parsing source demonstrations at the start of data generation, and it only required if intending to enable automatic subtask term signal annotation when running the
uses the first 0 -> 1 transition in this signal to detect the end of a subtask. dataset annotation tool. This method can be kept unimplemented if intending to use manual
subtask term signal annotation.
Args:
env_ids: Environment indices to get the termination signals for. If None, all envs are considered.
Returns: Returns:
subtask_term_signals (dict): dictionary that maps subtask name to termination flag (0 or 1) A dictionary termination signal flags (False or True) for each subtask.
""" """
raise NotImplementedError raise NotImplementedError
......
...@@ -17,43 +17,135 @@ from isaaclab.utils import configclass ...@@ -17,43 +17,135 @@ from isaaclab.utils import configclass
class DataGenConfig: class DataGenConfig:
"""Configuration settings for data generation processes within the Isaac Lab Mimic environment.""" """Configuration settings for data generation processes within the Isaac Lab Mimic environment."""
name: str = "demo" # The name of the datageneration, default is "demo" # The name of the datageneration, default is "demo"
source_dataset_path: str = None # Path to the source dataset for mimic generation name: str = "demo"
generation_path: str = None # Path where the generated data will be saved
generation_guarantee: bool = False # Whether to guarantee generation of data (e.g., retry until successful) # If set to True, generation will be retried until
generation_keep_failed: bool = True # Whether to keep failed generation trials # generation_num_trials successful demos have been generated.
generation_num_trials: int = 10 # Number of trial to be generated # If set to False, generation will stop after generation_num_trails,
generation_select_src_per_subtask: bool = False # Whether to select source data per subtask # independent of whether they were all successful or not.
generation_transform_first_robot_pose: bool = False # Whether to transform the first robot pose during generation generation_guarantee: bool = True
generation_interpolate_from_last_target_pose: bool = True # Whether to interpolate from last target pose
task_name: str = None # Name of the task being configured ##############################################################
max_num_failures: int = 50 # Maximum number of failures allowed before stopping generation # Debugging parameters, which can help determining low success
num_demo_to_render: int = 50 # Number of demonstrations to render # rates.
num_fail_demo_to_render: int = 50 # Number of failed demonstrations to render
seed: int = 1 # Seed for randomization to ensure reproducibility # Whether to keep failed generation trials. Keeping failed
# demonstrations is useful for visualizing and debugging low
# success rates.
generation_keep_failed: bool = False
# Maximum number of failures allowed before stopping generation
max_num_failures: int = 50
# Seed for randomization to ensure reproducibility
seed: int = 1
##############################################################
# The following values can be changed on the command line, and
# only serve as defaults.
# Path to the source dataset for mimic generation
source_dataset_path: str = None
# Path where the generated data will be saved
generation_path: str = None
# Number of trial to be generated
generation_num_trials: int = 10
# Name of the task being configured
task_name: str = None
##############################################################
# Advanced configuration, does not usually need to be changed
# Whether to select source data per subtask
# Note: this requires subtasks to be properly temporally
# constrained, and may require additional subtasks to allow
# for time synchronization.
generation_select_src_per_subtask: bool = False
# Whether to transform the first robot pose during generation
generation_transform_first_robot_pose: bool = False
# Whether to interpolate from last target pose
generation_interpolate_from_last_target_pose: bool = True
@configclass @configclass
class SubTaskConfig: class SubTaskConfig:
"""Configuration settings specific to the management of individual subtasks.""" """
Configuration settings specific to the management of individual
subtasks.
"""
##############################################################
# Mandatory options that should be defined for every subtask
# Reference to the object involved in this subtask, None if no
# object is involved (this is rarely the case).
object_ref: str = None
# Signal for subtask termination
subtask_term_signal: str = None
##############################################################
# Advanced options for tuning the generation results
# Strategy on how to select a subtask segment. Can be either
# 'random', 'nearest_neighbor_object' or
# 'nearest_neighbor_robot_distance'. Details can be found in
# source/isaaclab_mimic/isaaclab_mimic/datagen/selection_strategy.py
#
# Note: for 'nearest_neighbor_object' and
# 'nearest_neighbor_robot_distance', the subtask needs to have
# 'object_ref' set to a value other than 'None' above. At the
# same time, if 'object_ref' is not 'None', then either of
# those strategies will usually yield higher success rates
# than the default 'random' strategy.
selection_strategy: str = "random"
# Additional arguments to the selected strategy. See details on
# each strategy in
# source/isaaclab_mimic/isaaclab_mimic/datagen/selection_strategy.py
# Arguments will be passed through to the `select_source_demo`
# method.
selection_strategy_kwargs: dict = {}
object_ref: str = None # Reference to the object involved in this subtask # Range for start offset of the first subtask
subtask_term_signal: str = None # Signal for subtask termination first_subtask_start_offset_range: tuple = (0, 0)
subtask_term_offset_range: tuple = (0, 0) # Range for offsetting subtask termination
selection_strategy: str = None # Strategy for selecting subtask # Range for offsetting subtask termination
selection_strategy_kwargs: dict = {} # Keyword arguments for the selection strategy subtask_term_offset_range: tuple = (0, 0)
action_noise: float = 0.03 # Amplitude of action noise applied
num_interpolation_steps: int = 5 # Number of steps for interpolation between waypoints # Amplitude of action noise applied
num_fixed_steps: int = 0 # Number of fixed steps for the subtask action_noise: float = 0.03
apply_noise_during_interpolation: bool = False # Whether to apply noise during interpolation
# Number of steps for interpolation between waypoints
num_interpolation_steps: int = 5
# Number of fixed steps for the subtask
num_fixed_steps: int = 0
# Whether to apply noise during interpolation
apply_noise_during_interpolation: bool = False
@configclass @configclass
class MimicEnvCfg: class MimicEnvCfg:
"""Configuration class for the Mimic environment integration. """
Configuration class for the Mimic environment integration.
This class consolidates various configuration aspects for the Isaac Lab Mimic data generation pipeline. This class consolidates various configuration aspects for the
Isaac Lab Mimic data generation pipeline.
""" """
datagen_config: DataGenConfig = DataGenConfig() # Configuration for the data generation # Overall configuration for the data generation
subtask_configs: list[SubTaskConfig] = [] # List of configurations for each subtask datagen_config: DataGenConfig = DataGenConfig()
# Dictionary of list of subtask configurations for each end-effector.
# Keys are end-effector names.
# Currently, only a single end-effector is supported by Isaac Lab Mimic
# so `subtask_configs` must always be of size 1.
subtask_configs: dict[str, list[SubTaskConfig]] = {}
...@@ -47,9 +47,14 @@ class DataGenerator: ...@@ -47,9 +47,14 @@ class DataGenerator:
assert isinstance(self.env_cfg, MimicEnvCfg) assert isinstance(self.env_cfg, MimicEnvCfg)
self.dataset_path = dataset_path self.dataset_path = dataset_path
if len(self.env_cfg.subtask_configs) != 1:
raise ValueError("Data generation currently supports only one end-effector.")
(self.eef_name,) = self.env_cfg.subtask_configs.keys()
(self.subtask_configs,) = self.env_cfg.subtask_configs.values()
# sanity check on task spec offset ranges - final subtask should not have any offset randomization # sanity check on task spec offset ranges - final subtask should not have any offset randomization
assert self.env_cfg.subtask_configs[-1].subtask_term_offset_range[0] == 0 assert self.subtask_configs[-1].subtask_term_offset_range[0] == 0
assert self.env_cfg.subtask_configs[-1].subtask_term_offset_range[1] == 0 assert self.subtask_configs[-1].subtask_term_offset_range[1] == 0
self.demo_keys = demo_keys self.demo_keys = demo_keys
...@@ -88,8 +93,8 @@ class DataGenerator: ...@@ -88,8 +93,8 @@ class DataGenerator:
# add them to subtask end indices, and then set them as the start indices of next subtask too # add them to subtask end indices, and then set them as the start indices of next subtask too
for i in range(src_subtask_indices.shape[1] - 1): for i in range(src_subtask_indices.shape[1] - 1):
end_offsets = np.random.randint( end_offsets = np.random.randint(
low=self.env_cfg.subtask_configs[i].subtask_term_offset_range[0], low=self.subtask_configs[i].subtask_term_offset_range[0],
high=self.env_cfg.subtask_configs[i].subtask_term_offset_range[1] + 1, high=self.subtask_configs[i].subtask_term_offset_range[1] + 1,
size=src_subtask_indices.shape[0], size=src_subtask_indices.shape[0],
) )
src_subtask_indices[:, i, 1] = src_subtask_indices[:, i, 1] + end_offsets src_subtask_indices[:, i, 1] = src_subtask_indices[:, i, 1] + end_offsets
...@@ -235,6 +240,8 @@ class DataGenerator: ...@@ -235,6 +240,8 @@ class DataGenerator:
src_demo_inds (list): list of selected source demonstration indices for each subtask src_demo_inds (list): list of selected source demonstration indices for each subtask
src_demo_labels (np.array): same as @src_demo_inds, but repeated to have a label for each timestep of the trajectory src_demo_labels (np.array): same as @src_demo_inds, but repeated to have a label for each timestep of the trajectory
""" """
eef_names = list(self.env_cfg.subtask_configs.keys())
eef_name = eef_names[0]
# reset the env to create a new task demo instance # reset the env to create a new task demo instance
env_id_tensor = torch.tensor([env_id], dtype=torch.int64, device=self.env.device) env_id_tensor = torch.tensor([env_id], dtype=torch.int64, device=self.env.device)
...@@ -257,17 +264,17 @@ class DataGenerator: ...@@ -257,17 +264,17 @@ class DataGenerator:
) # like @generated_src_demo_inds, but padded to align with size of @generated_actions ) # like @generated_src_demo_inds, but padded to align with size of @generated_actions
prev_src_demo_datagen_info_pool_size = 0 prev_src_demo_datagen_info_pool_size = 0
for subtask_ind in range(len(self.env_cfg.subtask_configs)): for subtask_ind in range(len(self.subtask_configs)):
# some things only happen on first subtask # some things only happen on first subtask
is_first_subtask = subtask_ind == 0 is_first_subtask = subtask_ind == 0
# name of object for this subtask # name of object for this subtask
subtask_object_name = self.env_cfg.subtask_configs[subtask_ind].object_ref subtask_object_name = self.subtask_configs[subtask_ind].object_ref
# corresponding current object pose # corresponding current object pose
cur_object_pose = ( cur_object_pose = (
self.env.get_object_poses(env_ind=env_id)[subtask_object_name] self.env.get_object_poses(env_ids=[env_id])[subtask_object_name][0]
if (subtask_object_name is not None) if (subtask_object_name is not None)
else None else None
) )
...@@ -288,13 +295,13 @@ class DataGenerator: ...@@ -288,13 +295,13 @@ class DataGenerator:
# Run source demo selection or use selected demo from previous iteration # Run source demo selection or use selected demo from previous iteration
if need_source_demo_selection: if need_source_demo_selection:
selected_src_demo_ind = self.select_source_demo( selected_src_demo_ind = self.select_source_demo(
eef_pose=self.env.get_robot_eef_pose(env_ind=env_id), eef_pose=self.env.get_robot_eef_pose(eef_name, env_ids=[env_id])[0],
object_pose=cur_object_pose, object_pose=cur_object_pose,
subtask_ind=subtask_ind, subtask_ind=subtask_ind,
src_subtask_inds=all_subtask_inds[:, subtask_ind], src_subtask_inds=all_subtask_inds[:, subtask_ind],
subtask_object_name=subtask_object_name, subtask_object_name=subtask_object_name,
selection_strategy_name=self.env_cfg.subtask_configs[subtask_ind].selection_strategy, selection_strategy_name=self.subtask_configs[subtask_ind].selection_strategy,
selection_strategy_kwargs=self.env_cfg.subtask_configs[subtask_ind].selection_strategy_kwargs, selection_strategy_kwargs=self.subtask_configs[subtask_ind].selection_strategy_kwargs,
) )
assert selected_src_demo_ind is not None assert selected_src_demo_ind is not None
...@@ -356,17 +363,19 @@ class DataGenerator: ...@@ -356,17 +363,19 @@ class DataGenerator:
else: else:
# Interpolation segment will start from current robot eef pose. # Interpolation segment will start from current robot eef pose.
init_sequence = WaypointSequence.from_poses( init_sequence = WaypointSequence.from_poses(
poses=self.env.get_robot_eef_pose(env_ind=env_id)[None], eef_names=eef_names,
poses=self.env.get_robot_eef_pose(eef_name, env_ids=[env_id])[0][None],
gripper_actions=src_subtask_gripper_actions[0:1], gripper_actions=src_subtask_gripper_actions[0:1],
action_noise=self.env_cfg.subtask_configs[subtask_ind].action_noise, action_noise=self.subtask_configs[subtask_ind].action_noise,
) )
traj_to_execute.add_waypoint_sequence(init_sequence) traj_to_execute.add_waypoint_sequence(init_sequence)
# Construct trajectory for the transformed segment. # Construct trajectory for the transformed segment.
transformed_seq = WaypointSequence.from_poses( transformed_seq = WaypointSequence.from_poses(
eef_names=eef_names,
poses=transformed_eef_poses, poses=transformed_eef_poses,
gripper_actions=src_subtask_gripper_actions, gripper_actions=src_subtask_gripper_actions,
action_noise=self.env_cfg.subtask_configs[subtask_ind].action_noise, action_noise=self.subtask_configs[subtask_ind].action_noise,
) )
transformed_traj = WaypointTrajectory() transformed_traj = WaypointTrajectory()
transformed_traj.add_waypoint_sequence(transformed_seq) transformed_traj.add_waypoint_sequence(transformed_seq)
...@@ -375,11 +384,12 @@ class DataGenerator: ...@@ -375,11 +384,12 @@ class DataGenerator:
# Interpolation will happen from the initial pose (@init_sequence) to the first element of @transformed_seq. # Interpolation will happen from the initial pose (@init_sequence) to the first element of @transformed_seq.
traj_to_execute.merge( traj_to_execute.merge(
transformed_traj, transformed_traj,
num_steps_interp=self.env_cfg.subtask_configs[subtask_ind].num_interpolation_steps, eef_names=eef_names,
num_steps_fixed=self.env_cfg.subtask_configs[subtask_ind].num_fixed_steps, num_steps_interp=self.subtask_configs[subtask_ind].num_interpolation_steps,
num_steps_fixed=self.subtask_configs[subtask_ind].num_fixed_steps,
action_noise=( action_noise=(
float(self.env_cfg.subtask_configs[subtask_ind].apply_noise_during_interpolation) float(self.subtask_configs[subtask_ind].apply_noise_during_interpolation)
* self.env_cfg.subtask_configs[subtask_ind].action_noise * self.subtask_configs[subtask_ind].action_noise
), ),
) )
......
...@@ -36,9 +36,13 @@ class DataGenInfoPool: ...@@ -36,9 +36,13 @@ class DataGenInfoPool:
self._asyncio_lock = asyncio_lock self._asyncio_lock = asyncio_lock
self.subtask_term_signals = [subtask_config.subtask_term_signal for subtask_config in env_cfg.subtask_configs] if len(env_cfg.subtask_configs) != 1:
raise ValueError("Data generation currently supports only one end-effector.")
(subtask_configs,) = env_cfg.subtask_configs.values()
self.subtask_term_signals = [subtask_config.subtask_term_signal for subtask_config in subtask_configs]
self.subtask_term_offset_ranges = [ self.subtask_term_offset_ranges = [
subtask_config.subtask_term_offset_range for subtask_config in env_cfg.subtask_configs subtask_config.subtask_term_offset_range for subtask_config in subtask_configs
] ]
@property @property
...@@ -82,41 +86,49 @@ class DataGenInfoPool: ...@@ -82,41 +86,49 @@ class DataGenInfoPool:
episode (EpisodeData): episode to add episode (EpisodeData): episode to add
""" """
ep_grp = episode.data ep_grp = episode.data
eef_name = list(self.env.cfg.subtask_configs.keys())[0]
# extract datagen info # extract datagen info
# Extract eef poses if "datagen_info" in ep_grp["obs"]:
eef_pos = ep_grp["obs"]["eef_pos"] eef_pose = ep_grp["obs"]["datagen_info"]["eef_pose"][eef_name]
# format (w, x, y, z) object_poses_dict = ep_grp["obs"]["datagen_info"]["object_pose"]
eef_quat = ep_grp["obs"]["eef_quat"] target_eef_pose = ep_grp["obs"]["datagen_info"]["target_eef_pose"][eef_name]
subtask_term_signals_dict = ep_grp["obs"]["datagen_info"]["subtask_term_signals"]
eef_rot_matrices = PoseUtils.matrix_from_quat(eef_quat) # shape (N, 3, 3) else:
# Extract eef poses
# Create pose matrices for all environments eef_pos = ep_grp["obs"]["eef_pos"]
eef_pose = PoseUtils.make_pose(eef_pos, eef_rot_matrices) # shape (N, 4, 4) eef_quat = ep_grp["obs"]["eef_quat"] # format (w, x, y, z)
eef_rot_matrices = PoseUtils.matrix_from_quat(eef_quat) # shape (N, 3, 3)
object_poses_dict = dict() # Create pose matrices for all environments
# TODO: change object_pose key in the dataset to object_state since it is not just the pose eef_pose = PoseUtils.make_pose(eef_pos, eef_rot_matrices) # shape (N, 4, 4)
for object_name, value in ep_grp["obs"]["object_pose"].items():
# object_pose # Object poses
value = value["root_pose"] object_poses_dict = dict()
# Root state ``[pos, quat, lin_vel, ang_vel]`` in simulation world frame. Shape is (num_steps, 13). for object_name, value in ep_grp["obs"]["object_pose"].items():
# Quaternion ordering is wxyz # object_pose
value = value["root_pose"]
# Convert to rotation matrices # Root state ``[pos, quat, lin_vel, ang_vel]`` in simulation world frame. Shape is (num_steps, 13).
object_rot_matrices = PoseUtils.matrix_from_quat(value[:, 3:7]) # shape (N, 3, 3) # Quaternion ordering is wxyz
object_rot_positions = value[:, 0:3] # shape (N, 3) # Convert to rotation matrices
object_rot_matrices = PoseUtils.matrix_from_quat(value[:, 3:7]) # shape (N, 3, 3)
object_poses_dict[object_name] = PoseUtils.make_pose(object_rot_positions, object_rot_matrices) object_rot_positions = value[:, 0:3] # shape (N, 3)
object_poses_dict[object_name] = PoseUtils.make_pose(object_rot_positions, object_rot_matrices)
# Target eef pose
target_eef_pose = ep_grp["obs"]["target_eef_pose"]
# Subtask termination signalsS
subtask_term_signals_dict = (ep_grp["obs"]["subtask_term_signals"],)
# Extract gripper actions # Extract gripper actions
gripper_actions = self.env.action_to_gripper_action(ep_grp["actions"]) gripper_actions = self.env.actions_to_gripper_actions(ep_grp["actions"])[eef_name]
ep_datagen_info_obj = DatagenInfo( ep_datagen_info_obj = DatagenInfo(
eef_pose=eef_pose, eef_pose=eef_pose,
object_poses=object_poses_dict, object_poses=object_poses_dict,
subtask_term_signals=ep_grp["obs"]["subtask_term_signals"], subtask_term_signals=subtask_term_signals_dict,
target_eef_pose=ep_grp["obs"]["target_eef_pose"], target_eef_pose=target_eef_pose,
gripper_action=gripper_actions, gripper_action=gripper_actions,
) )
self._datagen_infos.append(ep_datagen_info_obj) self._datagen_infos.append(ep_datagen_info_obj)
......
...@@ -18,7 +18,7 @@ class Waypoint: ...@@ -18,7 +18,7 @@ class Waypoint:
Represents a single desired 6-DoF waypoint, along with corresponding gripper actuation for this point. Represents a single desired 6-DoF waypoint, along with corresponding gripper actuation for this point.
""" """
def __init__(self, pose, gripper_action, noise=None): def __init__(self, eef_names, pose, gripper_action, noise=None):
""" """
Args: Args:
pose (torch.Tensor): 4x4 pose target for robot controller pose (torch.Tensor): 4x4 pose target for robot controller
...@@ -26,10 +26,10 @@ class Waypoint: ...@@ -26,10 +26,10 @@ class Waypoint:
noise (float or None): action noise amplitude to apply during execution at this timestep noise (float or None): action noise amplitude to apply during execution at this timestep
(for arm actions, not gripper actions) (for arm actions, not gripper actions)
""" """
self.eef_names = eef_names
self.pose = pose self.pose = pose
self.gripper_action = gripper_action self.gripper_action = gripper_action
self.noise = noise self.noise = noise
assert len(self.gripper_action.shape) == 1
def __str__(self): def __str__(self):
"""String representation of the waypoint.""" """String representation of the waypoint."""
...@@ -54,7 +54,7 @@ class WaypointSequence: ...@@ -54,7 +54,7 @@ class WaypointSequence:
self.sequence = deepcopy(sequence) self.sequence = deepcopy(sequence)
@classmethod @classmethod
def from_poses(cls, poses, gripper_actions, action_noise): def from_poses(cls, eef_names, poses, gripper_actions, action_noise):
""" """
Instantiate a WaypointSequence object given a sequence of poses, Instantiate a WaypointSequence object given a sequence of poses,
gripper actions, and action noise. gripper actions, and action noise.
...@@ -79,6 +79,7 @@ class WaypointSequence: ...@@ -79,6 +79,7 @@ class WaypointSequence:
# make WaypointSequence instance # make WaypointSequence instance
sequence = [ sequence = [
Waypoint( Waypoint(
eef_names=eef_names,
pose=poses[t], pose=poses[t],
gripper_action=gripper_actions[t], gripper_action=gripper_actions[t],
noise=action_noise[t, 0], noise=action_noise[t, 0],
...@@ -201,6 +202,7 @@ class WaypointTrajectory: ...@@ -201,6 +202,7 @@ class WaypointTrajectory:
def add_waypoint_sequence_for_target_pose( def add_waypoint_sequence_for_target_pose(
self, self,
eef_names,
pose, pose,
gripper_action, gripper_action,
num_steps, num_steps,
...@@ -252,6 +254,7 @@ class WaypointTrajectory: ...@@ -252,6 +254,7 @@ class WaypointTrajectory:
# add waypoint sequence for this set of poses # add waypoint sequence for this set of poses
sequence = WaypointSequence.from_poses( sequence = WaypointSequence.from_poses(
eef_names=eef_names,
poses=poses, poses=poses,
gripper_actions=gripper_actions, gripper_actions=gripper_actions,
action_noise=action_noise, action_noise=action_noise,
...@@ -278,6 +281,7 @@ class WaypointTrajectory: ...@@ -278,6 +281,7 @@ class WaypointTrajectory:
def merge( def merge(
self, self,
other, other,
eef_names,
num_steps_interp=None, num_steps_interp=None,
num_steps_fixed=None, num_steps_fixed=None,
action_noise=0.0, action_noise=0.0,
...@@ -311,6 +315,7 @@ class WaypointTrajectory: ...@@ -311,6 +315,7 @@ class WaypointTrajectory:
if need_interp: if need_interp:
# interpolation segment # interpolation segment
self.add_waypoint_sequence_for_target_pose( self.add_waypoint_sequence_for_target_pose(
eef_names=eef_names,
pose=target_for_interpolation.pose, pose=target_for_interpolation.pose,
gripper_action=target_for_interpolation.gripper_action, gripper_action=target_for_interpolation.gripper_action,
num_steps=num_steps_interp, num_steps=num_steps_interp,
...@@ -324,6 +329,7 @@ class WaypointTrajectory: ...@@ -324,6 +329,7 @@ class WaypointTrajectory:
# account for the fact that we pop'd the first element of @other in anticipation of an interpolation segment # account for the fact that we pop'd the first element of @other in anticipation of an interpolation segment
num_steps_fixed_to_use = num_steps_fixed if need_interp else (num_steps_fixed + 1) num_steps_fixed_to_use = num_steps_fixed if need_interp else (num_steps_fixed + 1)
self.add_waypoint_sequence_for_target_pose( self.add_waypoint_sequence_for_target_pose(
eef_names=eef_names,
pose=target_for_interpolation.pose, pose=target_for_interpolation.pose,
gripper_action=target_for_interpolation.gripper_action, gripper_action=target_for_interpolation.gripper_action,
num_steps=num_steps_fixed_to_use, num_steps=num_steps_fixed_to_use,
...@@ -382,17 +388,15 @@ class WaypointTrajectory: ...@@ -382,17 +388,15 @@ class WaypointTrajectory:
obs = env.obs_buf obs = env.obs_buf
state = env.scene.get_state(is_relative=True) state = env.scene.get_state(is_relative=True)
# convert target pose to arm action # convert target pose and gripper action to env action
action_pose = env.target_eef_pose_to_action(target_eef_pose=waypoint.pose, env_ind=env_id) target_eef_pose_dict = {waypoint.eef_names[0]: waypoint.pose}
gripper_action_dict = {waypoint.eef_names[0]: waypoint.gripper_action}
# maybe add noise to action using torch.randn play_action = env.target_eef_pose_to_action(
if waypoint.noise is not None: target_eef_pose_dict=target_eef_pose_dict,
noise = waypoint.noise * torch.randn_like(action_pose) gripper_action_dict=gripper_action_dict,
action_pose += noise noise=waypoint.noise,
action_pose = torch.clamp(action_pose, -1.0, 1.0) env_id=env_id,
)
# add in gripper action
play_action = torch.cat([action_pose, waypoint.gripper_action], dim=0)
# step environment # step environment
if not isinstance(play_action, torch.Tensor): if not isinstance(play_action, torch.Tensor):
......
...@@ -31,12 +31,11 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg): ...@@ -31,12 +31,11 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg):
self.datagen_config.generation_transform_first_robot_pose = False self.datagen_config.generation_transform_first_robot_pose = False
self.datagen_config.generation_interpolate_from_last_target_pose = True self.datagen_config.generation_interpolate_from_last_target_pose = True
self.datagen_config.max_num_failures = 25 self.datagen_config.max_num_failures = 25
self.datagen_config.num_demo_to_render = 10
self.datagen_config.num_fail_demo_to_render = 25
self.datagen_config.seed = 1 self.datagen_config.seed = 1
# The following are the subtask configurations for the stack task. # The following are the subtask configurations for the stack task.
self.subtask_configs.append( subtask_configs = []
subtask_configs.append(
SubTaskConfig( SubTaskConfig(
# Each subtask involves manipulation with respect to a single object frame. # Each subtask involves manipulation with respect to a single object frame.
object_ref="cube_2", object_ref="cube_2",
...@@ -60,7 +59,7 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg): ...@@ -60,7 +59,7 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg):
apply_noise_during_interpolation=False, apply_noise_during_interpolation=False,
) )
) )
self.subtask_configs.append( subtask_configs.append(
SubTaskConfig( SubTaskConfig(
# Each subtask involves manipulation with respect to a single object frame. # Each subtask involves manipulation with respect to a single object frame.
object_ref="cube_1", object_ref="cube_1",
...@@ -82,7 +81,7 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg): ...@@ -82,7 +81,7 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg):
apply_noise_during_interpolation=False, apply_noise_during_interpolation=False,
) )
) )
self.subtask_configs.append( subtask_configs.append(
SubTaskConfig( SubTaskConfig(
# Each subtask involves manipulation with respect to a single object frame. # Each subtask involves manipulation with respect to a single object frame.
object_ref="cube_3", object_ref="cube_3",
...@@ -104,7 +103,7 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg): ...@@ -104,7 +103,7 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg):
apply_noise_during_interpolation=False, apply_noise_during_interpolation=False,
) )
) )
self.subtask_configs.append( subtask_configs.append(
SubTaskConfig( SubTaskConfig(
# Each subtask involves manipulation with respect to a single object frame. # Each subtask involves manipulation with respect to a single object frame.
object_ref="cube_2", object_ref="cube_2",
...@@ -126,3 +125,4 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg): ...@@ -126,3 +125,4 @@ class FrankaCubeStackIKRelMimicEnvCfg(FrankaCubeStackEnvCfg, MimicEnvCfg):
apply_noise_during_interpolation=False, apply_noise_during_interpolation=False,
) )
) )
self.subtask_configs["franka"] = subtask_configs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment