Unverified Commit 809d090d authored by Kelly Guo's avatar Kelly Guo Committed by GitHub

Adds throughput benchmarking scripts for different learning workflows (#759)

# Description
- Removes unneeded extensions from headless app file
- Limits CPU threads when running multi-GPU training to minimize thread
context switching
- Apply multi GPU renderer setting to avoid unnecessary GPU context
initialization
- Decreases 8k SH startup time on 8 GPUs from > 5 min to 1 min
- Adds benchmarking scripts

## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- Bug fix (non-breaking change which fixes an issue)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [ ] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->

---------
Co-authored-by: 's avatarAlexander <143108850+nv-apoddubny@users.noreply.github.com>
Co-authored-by: 's avatarDavid Hoeller <dhoeller@nvidia.com>
parent e90400b7
......@@ -20,14 +20,11 @@ app.version = "4.1.0"
# Omniverse related dependencies #
##################################
[dependencies]
"omni.kit.window.title" = {}
"omni.physx" = {}
"omni.physx.tensors" = {}
"omni.physx.fabric" = {}
"omni.warp.core" = {}
"usdrt.scenegraph" = {}
"omni.kit.primitive.mesh" = {}
"omni.kit.mainwindow" = {}
"omni.kit.telemetry" = {}
......
......@@ -55,6 +55,9 @@ rtx-transient.dlssg.enabled = false
rtx.sceneDb.ambientLightIntensity = 1.0
rtx.directLighting.sampledLighting.enabled = true
# Avoids unnecessary GPU context initialization
renderer.multiGpu.maxGpuCount=1
# Force synchronous rendering to improve training results
omni.replicator.asyncRendering = false
......
......@@ -182,6 +182,9 @@ rtx.newDenoiser.enabled = true
# Enable Iray and pxr by setting this to "rtx,iray,pxr"
renderer.enabled = "rtx"
# Avoids unnecessary GPU context initialization
renderer.multiGpu.maxGpuCount=1
### async rendering settings
omni.replicator.asyncRendering = false
app.asyncRendering = false
......
......@@ -55,6 +55,9 @@ rtx-transient.dlssg.enabled = false
rtx.sceneDb.ambientLightIntensity = 1.0
rtx.directLighting.sampledLighting.enabled = true
# Avoids unnecessary GPU context initialization
renderer.multiGpu.maxGpuCount=1
# Force synchronous rendering to improve training results
omni.replicator.asyncRendering = false
......
......@@ -464,6 +464,16 @@ class AppLauncher:
if distributed_train:
self.device_id = self.local_rank
launcher_args["multi_gpu"] = False
# limit CPU threads to minimize thread context switching
# this ensures processes do not take up all available threads and fight for resources
num_cpu_cores = os.cpu_count()
num_threads_per_process = num_cpu_cores // int(os.getenv("WORLD_SIZE", 1))
# set environment variables to limit CPU threads
os.environ["PXR_WORK_THREAD_LIMIT"] = str(num_threads_per_process)
os.environ["OPENBLAS_NUM_THREADS"] = str(num_threads_per_process)
# pass command line variable to kit
sys.argv.append(f"--/plugins/carb.tasking.plugin/threadCount={num_threads_per_process}")
# set physics and rendering device
launcher_args["physics_gpu"] = self.device_id
launcher_args["active_gpu"] = self.device_id
......
......@@ -108,7 +108,7 @@ class DirectRLEnv(gym.Env):
carb.log_warn(msg)
# generate scene
with Timer("[INFO]: Time taken for scene creation"):
with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
self.scene = InteractiveScene(self.cfg.scene)
self._setup_scene()
print("[INFO]: Scene manager: ", self.scene)
......@@ -127,7 +127,7 @@ class DirectRLEnv(gym.Env):
# note: when started in extension mode, first call sim.reset_async() and then initialize the managers
if builtins.ISAAC_LAUNCHED_FROM_TERMINAL is False:
print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...")
with Timer("[INFO]: Time taken for simulation start"):
with Timer("[INFO]: Time taken for simulation start", "simulation_start"):
self.sim.reset()
# -- event manager used for randomization
......
......@@ -107,7 +107,7 @@ class ManagerBasedEnv:
self._sim_step_counter = 0
# generate scene
with Timer("[INFO]: Time taken for scene creation"):
with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
self.scene = InteractiveScene(self.cfg.scene)
print("[INFO]: Scene manager: ", self.scene)
......@@ -125,7 +125,7 @@ class ManagerBasedEnv:
# note: when started in extension mode, first call sim.reset_async() and then initialize the managers
if builtins.ISAAC_LAUNCHED_FROM_TERMINAL is False:
print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...")
with Timer("[INFO]: Time taken for simulation start"):
with Timer("[INFO]: Time taken for simulation start", "simulation_start"):
self.sim.reset()
# add timeline event to load managers
self.load_managers()
......
......@@ -9,7 +9,7 @@ from __future__ import annotations
import time
from contextlib import ContextDecorator
from typing import Any
from typing import Any, ClassVar
class TimerError(Exception):
......@@ -60,14 +60,25 @@ class Timer(ContextDecorator):
Reference: https://gist.github.com/sumeet/1123871
"""
def __init__(self, msg: str | None = None):
timing_info: ClassVar[dict[str, float]] = dict()
"""Dictionary for storing the elapsed time per timer instances globally.
This dictionary logs the timer information. The keys are the names given to the timer class
at its initialization. If no :attr:`name` is passed to the constructor, no time
is recorded in the dictionary.
"""
def __init__(self, msg: str | None = None, name: str | None = None):
"""Initializes the timer.
Args:
msg: The message to display when using the timer
class in a context manager. Defaults to None.
name: The name to use for logging times in a global
dictionary. Defaults to None.
"""
self._msg = msg
self._name = name
self._start_time = None
self._stop_time = None
self._elapsed_time = None
......@@ -118,6 +129,9 @@ class Timer(ContextDecorator):
self._elapsed_time = self._stop_time - self._start_time
self._start_time = None
if self._name:
Timer.timing_info[self._name] = self._elapsed_time
"""
Context managers
"""
......@@ -133,3 +147,25 @@ class Timer(ContextDecorator):
# print message
if self._msg is not None:
print(self._msg, f": {self._elapsed_time:0.6f} seconds")
"""
Static Methods
"""
@staticmethod
def get_timer_info(name: str) -> float:
"""Retrieves the time logged in the global dictionary
based on name.
Args:
name: Name of the the entry to be retrieved.
Raises:
TimerError: If name doesn't exist in the log.
Returns:
A float containing the time logged if the name exists.
"""
if name not in Timer.timing_info:
raise TimerError(f"Timer {name} does not exist")
return Timer.timing_info.get(name)
......@@ -65,12 +65,14 @@ simulation_app = app_launcher.app
import numpy as np
import omni.isaac.core.utils.prims as prim_utils
import omni.kit
import omni.kit.commands
from omni.isaac.cloner import GridCloner
from omni.isaac.core.materials import PhysicsMaterial, PreviewSurface
from omni.isaac.core.objects import DynamicSphere
from omni.isaac.core.prims import GeometryPrim, RigidPrim, RigidPrimView
from omni.isaac.core.simulation_context import SimulationContext
from omni.isaac.core.utils.extensions import enable_extension
from omni.isaac.core.utils.viewports import set_camera_view
import omni.isaac.lab.sim as sim_utils
......@@ -79,6 +81,8 @@ from omni.isaac.lab.terrains.config.rough import ROUGH_TERRAINS_CFG
from omni.isaac.lab.terrains.terrain_importer import TerrainImporter
from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
enable_extension("omni.kit.primitive.mesh")
def main():
"""Generates a terrain from isaaclab."""
......
......@@ -17,11 +17,13 @@ import torch
import unittest
import omni.isaac.core.utils.prims as prim_utils
import omni.kit
import omni.kit.commands
from omni.isaac.cloner import GridCloner
from omni.isaac.core.materials import PhysicsMaterial, PreviewSurface
from omni.isaac.core.objects import DynamicSphere
from omni.isaac.core.prims import GeometryPrim, RigidPrim, RigidPrimView
from omni.isaac.core.utils.extensions import enable_extension
import omni.isaac.lab.terrains as terrain_gen
from omni.isaac.lab.sim import SimulationContext, build_simulation_context
......@@ -260,6 +262,7 @@ class TestTerrainImporter(unittest.TestCase):
)
else:
# -- Ball geometry
enable_extension("omni.kit.primitive.mesh")
cube_prim_path = omni.kit.commands.execute("CreateMeshPrimCommand", prim_type="Sphere")[1]
prim_utils.move_prim(cube_prim_path, "/World/envs/env_0/ball")
# -- Ball physics
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment