Fixes benchmarking scripts (#99)

# Description Fixes benchmarking scripts to handle new config parsing and distributed settings ## Type of change  - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there

Fixes benchmarking scripts (#99)
# Description Fixes benchmarking scripts to handle new config parsing and distributed settings ## Type of change  - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
e5214b89 · Kelly Guo · David Hoeller · d02877d7 · e5214b89 · e5214b89
Commit e5214b89 authored Aug 21, 2024 by Kelly Guo Committed by David Hoeller Sep 20, 2024
4 changed files
--- a/source/standalone/workflows/benchmarks/benchmark_non_rl.py
+++ b/source/standalone/workflows/benchmarks/benchmark_non_rl.py
@@ -17,7 +17,6 @@ parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
 parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
 parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
-parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
 parser.add_argument(
    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
 )
@@ -61,7 +60,7 @@ enable_extension("omni.isaac.benchmark.services")
 from omni.isaac.benchmark.services import BaseIsaacBenchmark
 from omni.isaac.lab.utils.timer import Timer
-from source.standalone.workflows.benchmarks.utils import (
+from source.standalone.benchmarks.utils import (
    log_app_start_time,
    log_python_imports_time,
    log_runtime_step_times,
@@ -107,9 +106,17 @@ def main():
    # parse configuration
    env_cfg = parse_env_cfg(
-        args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
+        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    )
+    # process distributed
+    world_size = 1
+    world_rank = 0
+    if args_cli.distributed:
+        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+        world_size = int(os.getenv("WORLD_SIZE", 1))
+        world_rank = app_launcher.global_rank
    task_startup_time_begin = time.perf_counter_ns()
    # create isaac environment
@@ -158,27 +165,28 @@ def main():
        # terminate
        break
-    benchmark.store_measurements()
+    if world_rank == 0:
+        benchmark.store_measurements()
-    # compute stats
-    step_times = np.array(step_times) / 1e6  # ns to ms
+        # compute stats
-    fps = 1.0 / (step_times / 1000)
+        step_times = np.array(step_times) / 1e6  # ns to ms
-    effective_fps = fps * env.unwrapped.num_envs
+        fps = 1.0 / (step_times / 1000)
+        effective_fps = fps * env.unwrapped.num_envs * world_size
-    # prepare step timing dict
-    environment_step_times = {
+        # prepare step timing dict
-        "Environment step times": step_times.tolist(),
+        environment_step_times = {
-        "Environment step FPS": fps.tolist(),
+            "Environment step times": step_times.tolist(),
-        "Environment step effective FPS": effective_fps.tolist(),
+            "Environment step FPS": fps.tolist(),
-    }
+            "Environment step effective FPS": effective_fps.tolist(),
+        }
-    log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
-    log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
+        log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
-    log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
+        log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
-    log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
+        log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
-    log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
+        log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
-    log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
+        log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
-    log_runtime_step_times(benchmark, environment_step_times, compute_stats=True)
+        log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
+        log_runtime_step_times(benchmark, environment_step_times, compute_stats=True)
    benchmark.stop()

--- a/source/standalone/workflows/benchmarks/benchmark_rlgames.py
+++ b/source/standalone/workflows/benchmarks/benchmark_rlgames.py
@@ -17,7 +17,6 @@ parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
 parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
 parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
-parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
 parser.add_argument(
    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
 )
@@ -82,7 +81,7 @@ from omni.isaac.lab_tasks.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesV
 imports_time_end = time.perf_counter_ns()
 from omni.isaac.lab.utils.timer import Timer
-from source.standalone.workflows.benchmarks.utils import (
+from source.standalone.benchmarks.utils import (
    log_app_start_time,
    log_python_imports_time,
    log_rl_policy_episode_lengths,
@@ -123,10 +122,18 @@ def main():
    # parse configuration
    env_cfg = parse_env_cfg(
-        args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
+        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    )
    agent_cfg = load_cfg_from_registry(args_cli.task, "rl_games_cfg_entry_point")
+    # process distributed
+    world_rank = 0
+    if args_cli.distributed:
+        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
+        world_rank = app_launcher.global_rank
    # override from command line
    if args_cli_seed is not None:
        agent_cfg["params"]["seed"] = args_cli_seed
@@ -210,32 +217,33 @@ def main():
    # train the agent
    runner.run({"train": True, "play": False, "sigma": None})
-    benchmark.store_measurements()
+    if world_rank == 0:
+        benchmark.store_measurements()
-    # parse tensorboard file stats
-    tensorboard_log_dir = os.path.join(log_root_path, log_dir, "summaries")
+        # parse tensorboard file stats
-    log_data = parse_tf_logs(tensorboard_log_dir)
+        tensorboard_log_dir = os.path.join(log_root_path, log_dir, "summaries")
+        log_data = parse_tf_logs(tensorboard_log_dir)
-    # prepare RL timing dict
-    rl_training_times = {
+        # prepare RL timing dict
-        "Environment only step time": log_data["performance/step_time"],
+        rl_training_times = {
-        "Environment + Inference step time": log_data["performance/step_inference_time"],
+            "Environment only step time": log_data["performance/step_time"],
-        "Environment + Inference + Policy update time": log_data["performance/rl_update_time"],
+            "Environment + Inference step time": log_data["performance/step_inference_time"],
-        "Environment only FPS": log_data["performance/step_fps"],
+            "Environment + Inference + Policy update time": log_data["performance/rl_update_time"],
-        "Environment + Inference FPS": log_data["performance/step_inference_fps"],
+            "Environment only FPS": log_data["performance/step_fps"],
-        "Environment + Inference + Policy update FPS": log_data["performance/step_inference_rl_update_fps"],
+            "Environment + Inference FPS": log_data["performance/step_inference_fps"],
-    }
+            "Environment + Inference + Policy update FPS": log_data["performance/step_inference_rl_update_fps"],
+        }
-    # log additional metrics to benchmark services
-    log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
+        # log additional metrics to benchmark services
-    log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
+        log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
-    log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
+        log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
-    log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
+        log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
-    log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
+        log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
-    log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
+        log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
-    log_runtime_step_times(benchmark, rl_training_times, compute_stats=True)
+        log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
-    log_rl_policy_rewards(benchmark, log_data["rewards/iter"])
+        log_runtime_step_times(benchmark, rl_training_times, compute_stats=True)
-    log_rl_policy_episode_lengths(benchmark, log_data["episode_lengths/iter"])
+        log_rl_policy_rewards(benchmark, log_data["rewards/iter"])
+        log_rl_policy_episode_lengths(benchmark, log_data["episode_lengths/iter"])
    benchmark.stop()

--- a/source/standalone/workflows/benchmarks/benchmark_rsl_rl.py
+++ b/source/standalone/workflows/benchmarks/benchmark_rsl_rl.py
@@ -24,7 +24,6 @@ parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
 parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
 parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
-parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
 parser.add_argument(
    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
 )
@@ -58,6 +57,7 @@ app_start_time_end = time.perf_counter_ns()
 imports_time_begin = time.perf_counter_ns()
 import gymnasium as gym
+import numpy as np
 import os
 import torch
 from datetime import datetime
@@ -80,7 +80,7 @@ enable_extension("omni.isaac.benchmark.services")
 from omni.isaac.benchmark.services import BaseIsaacBenchmark
 from omni.isaac.lab.utils.timer import Timer
-from source.standalone.workflows.benchmarks.utils import (
+from source.standalone.benchmarks.utils import (
    log_app_start_time,
    log_python_imports_time,
    log_rl_policy_episode_lengths,
@@ -118,7 +118,7 @@ def main():
    # parse configuration
    benchmark.set_phase("loading", start_recording_frametime=False, start_recording_runtime=True)
    env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(
-        args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
+        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    )
    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
@@ -188,9 +188,13 @@ def main():
    log_data = parse_tf_logs(log_dir)
    # prepare RL timing dict
+    collection_fps = (
+        1 / (np.array(log_data["Perf/collection time"])) * env.unwrapped.num_envs * agent_cfg.num_steps_per_env
+    )
    rl_training_times = {
-        "Collection Time": log_data["Perf/collection time"],
+        "Collection Time": (np.array(log_data["Perf/collection time"]) / 1000).tolist(),
-        "Learning Time": log_data["Perf/learning_time"],
+        "Learning Time": (np.array(log_data["Perf/learning_time"]) / 1000).tolist(),
+        "Collection FPS": collection_fps.tolist(),
        "Total FPS": log_data["Perf/total_fps"],
    }

--- a/source/standalone/workflows/benchmarks/utils.py
+++ b/source/standalone/workflows/benchmarks/utils.py