Commit 0301b826 authored by peterd-NV's avatar peterd-NV Committed by Kelly Guo

Updates training and play scripts for robomimic workflow (#150)

<!--
Thank you for your interest in sending a pull request. Please make sure
to check the contribution guidelines.

Link: https://isaac-sim.github.io/IsaacLab/source/refs/contributing.html
-->

Update robomimic training and play scripts.

<!-- As a practice, it is recommended to open an issue to have
discussions on the proposed pull request.
This makes it easier for the community to keep track of what is being
developed or added, and if a given feature
is demanded by more than one party. -->

<!-- As you go through the list, delete the ones that are not
applicable. -->

- New feature (non-breaking change which adds functionality)
- This change requires a documentation update

Please attach before and after screenshots of the change if applicable.

<!--
Example:

| Before | After |
| ------ | ----- |
| _gif/png before_ | _gif/png after_ |

To upload images to a PR -- simply drag and drop an image while in edit
mode and it should upload the image directly. You can then paste that
source into the above before/after sections.
-->

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [ ] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->

---------
Signed-off-by: 's avatarpeterd-NV <peterd@nvidia.com>
Co-authored-by: 's avatarCY Chen <cyc@nvidia.com>
Co-authored-by: 's avataroahmednv <oahmed@Nvidia.com>
Co-authored-by: 's avatarToni-SM <aserranomuno@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
parent a2c0cdbd
......@@ -73,7 +73,6 @@ Changed
* Changed manager-based vision cartpole environment names from Isaac-Cartpole-RGB-Camera-v0
and Isaac-Cartpole-Depth-Camera-v0 to Isaac-Cartpole-RGB-v0 and Isaac-Cartpole-Depth-v0
0.10.13 (2024-10-28)
~~~~~~~~~~~~~~~~~~~~
......
......@@ -3,7 +3,7 @@
#
# SPDX-License-Identifier: BSD-3-Clause
"""Script to run a trained policy from robomimic."""
"""Script to play and evaluate a trained policy from robomimic."""
"""Launch Isaac Sim Simulator first."""
......@@ -12,12 +12,16 @@ import argparse
from omni.isaac.lab.app import AppLauncher
# add argparse arguments
parser = argparse.ArgumentParser(description="Play policy trained using robomimic for Isaac Lab environments.")
parser = argparse.ArgumentParser(description="Evaluate robomimic policy for Isaac Lab environment.")
parser.add_argument(
"--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
)
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--checkpoint", type=str, default=None, help="Pytorch model checkpoint to load.")
parser.add_argument("--horizon", type=int, default=800, help="Step horizon of each rollout.")
parser.add_argument("--num_rollouts", type=int, default=1, help="Number of rollouts.")
parser.add_argument("--seed", type=int, default=101, help="Random seed.")
# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
......@@ -32,47 +36,83 @@ simulation_app = app_launcher.app
import gymnasium as gym
import torch
import robomimic # noqa: F401
import robomimic.utils.file_utils as FileUtils
import robomimic.utils.torch_utils as TorchUtils
import omni.isaac.lab_tasks # noqa: F401
from omni.isaac.lab_tasks.utils import parse_env_cfg
def rollout(policy, env, horizon, device):
policy.start_episode
obs_dict, _ = env.reset()
traj = dict(actions=[], obs=[], next_obs=[])
for i in range(horizon):
# Prepare observations
obs = obs_dict["policy"]
for ob in obs:
obs[ob] = torch.squeeze(obs[ob])
traj["obs"].append(obs)
# Compute actions
actions = policy(obs)
actions = torch.from_numpy(actions).to(device=device).view(1, env.action_space.shape[1])
# Apply actions
obs_dict, _, terminated, truncated, _ = env.step(actions)
obs = obs_dict["policy"]
# Record trajectory
traj["actions"].append(actions.tolist())
traj["next_obs"].append(obs)
if terminated:
return True, traj
elif truncated:
return False, traj
return False, traj
def main():
"""Run a trained policy from robomimic with Isaac Lab environment."""
# parse configuration
env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=1, use_fabric=not args_cli.disable_fabric)
# we want to have the terms in the observations returned as a dictionary
# rather than a concatenated tensor
# Set observations to dictionary mode for Robomimic
env_cfg.observations.policy.concatenate_terms = False
# create environment
# Set termination conditions
env_cfg.terminations.time_out = None
# Disable recorder
env_cfg.recorders = None
# Create environment
env = gym.make(args_cli.task, cfg=env_cfg)
# acquire device
# Set seed
torch.manual_seed(args_cli.seed)
env.seed(args_cli.seed)
# Acquire device
device = TorchUtils.get_torch_device(try_to_use_cuda=True)
# restore policy
# Load policy
policy, _ = FileUtils.policy_from_checkpoint(ckpt_path=args_cli.checkpoint, device=device, verbose=True)
# reset environment
obs_dict, _ = env.reset()
# robomimic only cares about policy observations
obs = obs_dict["policy"]
# simulate environment
while simulation_app.is_running():
# run everything in inference mode
with torch.inference_mode():
# compute actions
actions = policy(obs)
actions = torch.from_numpy(actions).to(device=device).view(1, env.action_space.shape[1])
# apply actions
obs_dict = env.step(actions)[0]
# robomimic only cares about policy observations
obs = obs_dict["policy"]
# Run policy
results = []
for trial in range(args_cli.num_rollouts):
print(f"[INFO] Starting trial {trial}")
terminated, traj = rollout(policy, env, args_cli.horizon, device)
results.append(terminated)
print(f"[INFO] Trial {trial}: {terminated}\n")
print(f"\nSuccessful trials: {results.count(True)}, out of {len(results)} trials")
print(f"Success rate: {results.count(True) / len(results)}")
print(f"Trial Results: {results}\n")
# close the simulator
env.close()
......
......@@ -36,7 +36,6 @@ Args:
This file has been modified from the original version in the following ways:
* Added import of AppLauncher from omni.isaac.lab.app to resolve the configuration to load for training.
"""
"""Launch Isaac Sim Simulator first."""
......@@ -67,7 +66,7 @@ import robomimic.utils.file_utils as FileUtils
import robomimic.utils.obs_utils as ObsUtils
import robomimic.utils.torch_utils as TorchUtils
import robomimic.utils.train_utils as TrainUtils
from robomimic.algo import RolloutPolicy, algo_factory
from robomimic.algo import algo_factory
from robomimic.config import config_factory
from robomimic.utils.log_utils import DataLogger, PrintLogger
......@@ -85,6 +84,7 @@ def train(config, device):
print(config)
print("")
log_dir, ckpt_dir, video_dir = TrainUtils.get_exp_dir(config)
print(f">>> Saving logs into directory: {log_dir}")
print(f">>> Saving checkpoints into directory: {ckpt_dir}")
print(f">>> Saving videos into directory: {video_dir}")
......@@ -194,8 +194,6 @@ def train(config, device):
# main training loop
best_valid_loss = None
best_return = {k: -np.inf for k in envs} if config.experiment.rollout.enabled else None
best_success_rate = {k: -1.0 for k in envs} if config.experiment.rollout.enabled else None
last_ckpt_time = time.time()
# number of learning steps per epoch (defaults to a full dataset pass)
......@@ -259,66 +257,6 @@ def train(config, device):
should_save_ckpt = True
ckpt_reason = "valid" if ckpt_reason is None else ckpt_reason
# Evaluate the model by by running rollouts
# do rollouts at fixed rate or if it's time to save a new ckpt
video_paths = None
rollout_check = (epoch % config.experiment.rollout.rate == 0) or (should_save_ckpt and ckpt_reason == "time")
if config.experiment.rollout.enabled and (epoch > config.experiment.rollout.warmstart) and rollout_check:
# wrap model as a RolloutPolicy to prepare for rollouts
rollout_model = RolloutPolicy(model, obs_normalization_stats=obs_normalization_stats)
num_episodes = config.experiment.rollout.n
all_rollout_logs, video_paths = TrainUtils.rollout_with_stats(
policy=rollout_model,
envs=envs,
horizon=config.experiment.rollout.horizon,
use_goals=config.use_goals,
num_episodes=num_episodes,
render=False,
video_dir=video_dir if config.experiment.render_video else None,
epoch=epoch,
video_skip=config.experiment.get("video_skip", 5),
terminate_on_success=config.experiment.rollout.terminate_on_success,
)
# summarize results from rollouts to tensorboard and terminal
for env_name in all_rollout_logs:
rollout_logs = all_rollout_logs[env_name]
for k, v in rollout_logs.items():
if k.startswith("Time_"):
data_logger.record(f"Timing_Stats/Rollout_{env_name}_{k[5:]}", v, epoch)
else:
data_logger.record(f"Rollout/{k}/{env_name}", v, epoch, log_stats=True)
print("\nEpoch {} Rollouts took {}s (avg) with results:".format(epoch, rollout_logs["time"]))
print(f"Env: {env_name}")
print(json.dumps(rollout_logs, sort_keys=True, indent=4))
# checkpoint and video saving logic
updated_stats = TrainUtils.should_save_from_rollout_logs(
all_rollout_logs=all_rollout_logs,
best_return=best_return,
best_success_rate=best_success_rate,
epoch_ckpt_name=epoch_ckpt_name,
save_on_best_rollout_return=config.experiment.save.on_best_rollout_return,
save_on_best_rollout_success_rate=config.experiment.save.on_best_rollout_success_rate,
)
best_return = updated_stats["best_return"]
best_success_rate = updated_stats["best_success_rate"]
epoch_ckpt_name = updated_stats["epoch_ckpt_name"]
should_save_ckpt = (
config.experiment.save.enabled and updated_stats["should_save_ckpt"]
) or should_save_ckpt
if updated_stats["ckpt_reason"] is not None:
ckpt_reason = updated_stats["ckpt_reason"]
# Only keep saved videos if the ckpt should be saved (but not because of validation score)
should_save_video = (should_save_ckpt and (ckpt_reason != "valid")) or config.experiment.keep_all_videos
if video_paths is not None and not should_save_video:
for env_name in video_paths:
os.remove(video_paths[env_name])
# Save model checkpoints based on conditions (success rate, validation loss, etc)
if should_save_ckpt:
TrainUtils.save_model(
......@@ -348,6 +286,8 @@ def main(args):
cfg_entry_point_key = f"robomimic_{args.algo}_cfg_entry_point"
print(f"Loading configuration for task: {args.task}")
print(gym.envs.registry.keys())
print(" ")
cfg_entry_point_file = gym.spec(args.task).kwargs.pop(cfg_entry_point_key)
# check if entry point exists
if cfg_entry_point_file is None:
......@@ -355,7 +295,7 @@ def main(args):
f"Could not find configuration for the environment: '{args.task}'."
f" Please check that the gym registry has the entry point: '{cfg_entry_point_key}'."
)
# load config from json file
with open(cfg_entry_point_file) as f:
ext_cfg = json.load(f)
config = config_factory(ext_cfg["algo_name"])
......@@ -373,7 +313,8 @@ def main(args):
config.experiment.name = args.name
# change location of experiment directory
config.train.output_dir = os.path.abspath(os.path.join("./logs/robomimic", args.task))
config.train.output_dir = os.path.abspath(os.path.join("./logs", args.log_dir, args.task))
# get torch device
device = TorchUtils.get_torch_device(try_to_use_cuda=config.train.cuda)
......@@ -409,6 +350,7 @@ if __name__ == "__main__":
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--algo", type=str, default=None, help="Name of the algorithm.")
parser.add_argument("--log_dir", type=str, default="robomimic", help="Path to log directory")
args = parser.parse_args()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment