Commit 9ecd3abf authored by Kelly Guo's avatar Kelly Guo Committed by Kelly Guo

Fixes distributed reporting for RSL RL benchmark (#498)

# Description

RSL RL reports data for a single GPU. For multi-GPU benchmarking, we
multiple the data by the number of GPUs to capture the total FPS.


## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- Bug fix (non-breaking change which fixes an issue)


## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [ ] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->
parent 66665928
...@@ -390,6 +390,7 @@ while [[ $# -gt 0 ]]; do ...@@ -390,6 +390,7 @@ while [[ $# -gt 0 ]]; do
if ! command -v pre-commit &>/dev/null; then if ! command -v pre-commit &>/dev/null; then
echo "[INFO] Installing pre-commit..." echo "[INFO] Installing pre-commit..."
pip install pre-commit pip install pre-commit
sudo apt-get install -y pre-commit
fi fi
# always execute inside the Isaac Lab directory # always execute inside the Isaac Lab directory
echo "[INFO] Formatting the repository..." echo "[INFO] Formatting the repository..."
......
...@@ -143,6 +143,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen ...@@ -143,6 +143,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# multi-gpu training configuration # multi-gpu training configuration
world_rank = 0 world_rank = 0
world_size = 1
if args_cli.distributed: if args_cli.distributed:
env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
agent_cfg.device = f"cuda:{app_launcher.local_rank}" agent_cfg.device = f"cuda:{app_launcher.local_rank}"
...@@ -152,6 +153,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen ...@@ -152,6 +153,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
env_cfg.seed = seed env_cfg.seed = seed
agent_cfg.seed = seed agent_cfg.seed = seed
world_rank = app_launcher.global_rank world_rank = app_launcher.global_rank
world_size = int(os.getenv("WORLD_SIZE", 1))
# specify directory for logging experiments # specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
...@@ -221,13 +223,17 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen ...@@ -221,13 +223,17 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# prepare RL timing dict # prepare RL timing dict
collection_fps = ( collection_fps = (
1 / (np.array(log_data["Perf/collection time"])) * env.unwrapped.num_envs * agent_cfg.num_steps_per_env 1
/ (np.array(log_data["Perf/collection time"]))
* env.unwrapped.num_envs
* agent_cfg.num_steps_per_env
* world_size
) )
rl_training_times = { rl_training_times = {
"Collection Time": (np.array(log_data["Perf/collection time"]) / 1000).tolist(), "Collection Time": (np.array(log_data["Perf/collection time"]) / 1000).tolist(),
"Learning Time": (np.array(log_data["Perf/learning_time"]) / 1000).tolist(), "Learning Time": (np.array(log_data["Perf/learning_time"]) / 1000).tolist(),
"Collection FPS": collection_fps.tolist(), "Collection FPS": collection_fps.tolist(),
"Total FPS": log_data["Perf/total_fps"], "Total FPS": log_data["Perf/total_fps"] * world_size,
} }
# log additional metrics to benchmark services # log additional metrics to benchmark services
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment