Reorganizes configuration files for locomotion velocity environment (#220)

# Description This MR does the following: * Changes the way environment and agent configs are handled. These are now moved to their respective `env` folder. * Replaces YAML file with configclasses for rsl_rl library * Adds examples of other robots for locomotion-velocity task: anymal-b, unitree-a1, and anymal-d (placeholder) The new environment folder structure is as follows: ``` my_task (folder) |- mdp (folder where all user's task-specific terms are stored) |- config |- robot_name_1 (folder) |- robot_specific_env_cfg.py | agents (folder) |- rsl_rl_cfg.py |- sb3_cfg.py |- .... |- __init__.py (register robot-specific tasks) |- robot_name_2 (folder) |- my_task_env_cfg.py (base env config) |- my_task_env.py (base env -- optional -- should normally use RLEnv) ``` Fixes #142 ## Type of change - New feature (non-breaking change which adds functionality) - Breaking change (fix or feature that would cause existing functionality to not work as expected) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file --------- Co-authored-by: Nikita Rudin <nrudin@nvidia.com>

Reorganizes configuration files for locomotion velocity environment (#220)
# Description This MR does the following: * Changes the way environment and agent configs are handled. These are now moved to their respective `env` folder. * Replaces YAML file with configclasses for rsl_rl library * Adds examples of other robots for locomotion-velocity task: anymal-b, unitree-a1, and anymal-d (placeholder) The new environment folder structure is as follows: ``` my_task (folder) |- mdp (folder where all user's task-specific terms are stored) |- config |- robot_name_1 (folder) |- robot_specific_env_cfg.py | agents (folder) |- rsl_rl_cfg.py |- sb3_cfg.py |- .... |- __init__.py (register robot-specific tasks) |- robot_name_2 (folder) |- my_task_env_cfg.py (base env config) |- my_task_env.py (base env -- optional -- should normally use RLEnv) ``` Fixes #142 ## Type of change - New feature (non-breaking change which adds functionality) - Breaking change (fix or feature that would cause existing functionality to not work as expected) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file --------- Co-authored-by: Nikita Rudin <nrudin@nvidia.com>
e7d43de3 · Mayank Mittal · GitHub · a9b5df11 · e7d43de3 · e7d43de3
Unverified Commit e7d43de3 authored Nov 01, 2023 by Mayank Mittal Committed by GitHub Nov 01, 2023
93 changed files
--- a/source/extensions/omni.isaac.orbit/config/extension.toml
+++ b/source/extensions/omni.isaac.orbit/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.9.29"
+version = "0.9.30"
 # Description
 title = "ORBIT framework for Robot Learning"

--- a/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
 Changelog
 ---------
+0.9.30 (2023-11-01)
+~~~~~~~~~~~~~~~~~~~
+Fixed
+^^^^^
+* Added skipping of None values in the :class:`InteractiveScene` class when creating the scene from configuration
+  objects. Earlier, it was throwing an error when the user passed a None value for a scene element.
+* Added ``kwargs`` to the :class:`RLEnv` class to allow passing additional arguments from gym registry function.
+  This is now needed since the registry function passes args beyond the ones specified in the constructor.
 0.9.29 (2023-11-01)
 ~~~~~~~~~~~~~~~~~~~

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/assets/config/unitree.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/assets/config/unitree.py
@@ -28,7 +28,7 @@ from ..articulation import ArticulationCfg
 UNITREE_A1_CFG = ArticulationCfg(
    spawn=sim_utils.UsdFileCfg(
        usd_path=f"{ISAAC_ORBIT_NUCLEUS_DIR}/Robots/Unitree/A1/a1_instanceable.usd",
-        activate_contact_sensors=False,
+        activate_contact_sensors=True,
        rigid_props=sim_utils.RigidBodyPropertiesCfg(
            disable_gravity=False,
            retain_accelerations=False,
@@ -49,10 +49,11 @@ UNITREE_A1_CFG = ArticulationCfg(
            ".*R_hip_joint": -0.1,
            "F[L,R]_thigh_joint": 0.8,
            "R[L,R]_thigh_joint": 1.0,
-            ".*_calf_joint": -1.8,
+            ".*_calf_joint": -1.5,
        },
        joint_vel={".*": 0.0},
    ),
+    soft_joint_pos_limit_factor=0.9,
    actuators={
        "base_legs": DCMotorCfg(
            joint_names_expr=[".*"],

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/rl_env.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/rl_env.py
@@ -78,7 +78,7 @@ class RLEnv(BaseEnv, gym.Env):
    cfg: RLEnvCfg
    """Configuration for the environment."""
-    def __init__(self, cfg: RLEnvCfg):
+    def __init__(self, cfg: RLEnvCfg, **kwargs):
        # initialize the base class to setup the scene.
        super().__init__(cfg=cfg)

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/scene/interactive_scene.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/scene/interactive_scene.py
@@ -327,7 +327,7 @@ class InteractiveScene:
        for asset_name, asset_cfg in self.cfg.__dict__.items():
            # skip keywords
            # note: easier than writing a list of keywords: [num_envs, env_spacing, lazy_sensor_update]
-            if asset_name in InteractiveSceneCfg.__dataclass_fields__:
+            if asset_name in InteractiveSceneCfg.__dataclass_fields__ or asset_cfg is None:
                continue
            # resolve regex
            asset_cfg.prim_path = asset_cfg.prim_path.format(ENV_REGEX_NS=self.env_regex_ns)

--- a/source/extensions/omni.isaac.orbit_envs/config/extension.toml
+++ b/source/extensions/omni.isaac.orbit_envs/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.4.3"
+version = "0.5.0"
 # Description
 title = "ORBIT Environments"

--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/cartpole_sac.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/cartpole_sac.yaml
-params:
-  seed: 12344
-  # environment wrapper clipping
-  env:
-    clip_observations: 5.0
-    clip_actions: 1.0
-  algo:
-    name: sac
-  model:
-    name: soft_actor_critic
-  network:
-    name: soft_actor_critic
-    separate: True
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: 0
-        fixed_sigma: True
-    mlp:
-      units: [32, 32]
-      activation: elu
-      d2rl: False
-      initializer:
-        name: default
-      regularizer:
-        name: None
-    log_std_bounds: [-5, 2]
-  load_checkpoint: False # flag which sets whether to load the checkpoint
-  load_path: '' # path to the checkpoint to load
-  config:
-    name: cartpole
-    env_name: rlgpu
-    device: 'cuda:0'
-    device_name: 'cuda:0'
-    multi_gpu: False
-    # ppo: True
-    mixed_precision: False
-    normalize_input: True
-    normalize_value: True
-    num_actors: -1
-    reward_shaper:
-      scale_value: 0.1
-    normalize_advantage: True
-    gamma: 0.99
-    tau: 0.95
-    learning_rate: 3e-4
-    lr_schedule: adaptive
-    kl_threshold: 0.008
-    score_to_win: 20000
-    max_epochs: 65
-    save_best_after: 50
-    save_frequency: 25
-    grad_norm: 1.0
-    entropy_coef: 0.0
-    truncate_grads: True
-    e_clip: 0.2
-    horizon_length: 16
-    batch_size: 8192
-    mini_epochs: 8
-    critic_coef: 4
-    clip_value: True
-    seq_len: 4
-    bounds_loss_coef: 0.0001
-    num_seed_steps: 5
-    num_steps_per_episode: 16
-    critic_tau: 0.005
-    init_alpha: 1
-    learnable_temperature: True
-    replay_buffer_size: 1000000
-    actor_lr: 0.0005
-    critic_lr: 0.0005
-    alpha_lr: 0.005
--- a/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/anymal_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/anymal_ppo.yaml
-# seed for the experiment
-seed: 42
-# device for the rl-agent
-device: 'cuda:0'
-policy:
-  init_noise_std: 1.0
-  actor_hidden_dims: [512, 256, 128]
-  critic_hidden_dims: [512, 256, 128]
-  activation: "elu"  # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-  # only required when "policy_class_name" is'ActorCriticRecurrent':
-  # rnn_type: 'lstm'
-  # rnn_hidden_size: 512
-  # rnn_num_layers: 1
-algorithm:
-  # training params
-  value_loss_coef: 1.0
-  use_clipped_value_loss: True
-  clip_param: 0.2
-  entropy_coef: 0.005
-  num_learning_epochs: 5
-  num_mini_batches: 4  # mini batch size: num_envs * nsteps / nminibatches
-  learning_rate: 1.0e-3  # 5.e-4
-  schedule: "adaptive"  # adaptive, fixed
-  gamma: 0.99
-  lam: 0.95
-  desired_kl: 0.01
-  max_grad_norm: 1.0
-runner:
-  policy_class_name: "ActorCritic"
-  algorithm_class_name: "PPO"
-  num_steps_per_env: 24  # per iteration
-  max_iterations: 1500  # number of policy updates
-  empirical_normalization: False
-  # logging
-  save_interval: 50  # check for potential saves every this many iterations
-  experiment_name: "anymal_c"
-  run_name: ""
-  # load and resume
-  resume: False
-  load_run: -1  # -1: last run
-  checkpoint: -1  # -1: last saved model
-  resume_path: None  # updated from load_run and chkpt
--- a/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/cabinet_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/cabinet_ppo.yaml
-# seed for the experiment
-seed: 67854
-# device for the rl-agent
-device: 'cuda:0'
-policy:
-  init_noise_std: 1.0
-  actor_hidden_dims: [256, 128, 64]
-  critic_hidden_dims: [256, 128, 64]
-  activation: "elu"  # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-  # only required when "policy_class_name" is'ActorCriticRecurrent':
-  # rnn_type: 'lstm'
-  # rnn_hidden_size: 512
-  # rnn_num_layers: 1
-algorithm:
-  # training params
-  value_loss_coef: 1.0
-  use_clipped_value_loss: True
-  # Seems to be the same as clip_range
-  clip_param: 0.2
-  # Seems to be the same as ent_coef
-  entropy_coef: 0.00
-  # ? seems to be the same as n_epochs
-  num_learning_epochs: 8
-  # seems to be the same as batch_size
-  num_mini_batches: 8  # mini batch size: num_envs * nsteps / nminibatches
-  # seems to be same as learning_ratena
-  learning_rate: 5.0e-4  # 5.e-4
-  schedule: "adaptive"  # adaptive, fixed
-  # same as gamma
-  gamma: 0.99
-  # seems to be the same as gae_lambda
-  lam: 0.95
-  # seems to be the same as target_kl
-  desired_kl: 0.008
-  # seems to be the same as max_grad_norm
-  max_grad_norm: 1.0
-runner:
-  policy_class_name: "ActorCritic"
-  algorithm_class_name: "PPO"
-  num_steps_per_env: 64  # per iteration
-  max_iterations: 200  # number of policy updates
-  # logging
-  save_interval: 50  # check for potential saves every this many iterations
-  experiment_name: "cabinet"
-  run_name: ""
-  # load and resume
-  resume: False
-  load_run: -1  # -1: last run
-  checkpoint: -1  # -1: last saved model
-  resume_path: None  # updated from load_run and chkpt
--- a/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/cartpole_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/cartpole_ppo.yaml
-# seed for the experiment
-seed: 42
-# device for the rl-agent
-device: 'cuda:0'
-policy:
-  init_noise_std: 1.0
-  actor_hidden_dims: [256, 128, 64]
-  critic_hidden_dims: [256, 128, 64]
-  activation: "elu"  # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-  # only required when "policy_class_name" is'ActorCriticRecurrent':
-  # rnn_type: 'lstm'
-  # rnn_hidden_size: 512
-  # rnn_num_layers: 1
-algorithm:
-  # training params
-  value_loss_coef: 1.0
-  use_clipped_value_loss: True
-  clip_param: 0.2
-  entropy_coef: 0.01
-  num_learning_epochs: 8
-  num_mini_batches: 4  # mini batch size: num_envs * nsteps / nminibatches
-  learning_rate: 1.0e-3  # 5.e-4
-  schedule: "adaptive"  # adaptive, fixed
-  gamma: 0.99
-  lam: 0.95
-  desired_kl: 0.01
-  max_grad_norm: 1.0
-runner:
-  policy_class_name: "ActorCritic"
-  algorithm_class_name: "PPO"
-  num_steps_per_env: 16  # per iteration
-  max_iterations: 500  # number of policy updates
-  # logging
-  save_interval: 50  # check for potential saves every this many iterations
-  experiment_name: "cartpole"
-  run_name: ""
-  # load and resume
-  resume: False
-  load_run: -1  # -1: last run
-  checkpoint: -1  # -1: last saved model
-  resume_path: None  # updated from load_run and chkpt
--- a/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/lift_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/lift_ppo.yaml
-# seed for the experiment
-seed: 42
-# device for the rl-agent
-device: 'cuda:0'
-policy:
-  init_noise_std: 1.0
-  actor_hidden_dims: [256, 128, 64]
-  critic_hidden_dims: [256, 128, 64]
-  activation: "elu"  # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-  # only required when "policy_class_name" is'ActorCriticRecurrent':
-  # rnn_type: 'lstm'
-  # rnn_hidden_size: 512
-  # rnn_num_layers: 1
-algorithm:
-  # training params
-  value_loss_coef: 1.0
-  use_clipped_value_loss: True
-  clip_param: 0.2
-  entropy_coef: 0.01
-  num_learning_epochs: 5
-  num_mini_batches: 4  # mini batch size: num_envs * nsteps / nminibatches
-  learning_rate: 1.0e-3  # 5.e-4
-  schedule: "adaptive"  # adaptive, fixed
-  gamma: 0.99
-  lam: 0.95
-  desired_kl: 0.01
-  max_grad_norm: 1.0
-runner:
-  policy_class_name: "ActorCritic"
-  algorithm_class_name: "PPO"
-  num_steps_per_env: 96  # per iteration
-  max_iterations: 700  # number of policy updates
-  # logging
-  save_interval: 50  # check for potential saves every this many iterations
-  experiment_name: "lift"
-  run_name: ""
-  # load and resume
-  resume: False
-  load_run: -1  # -1: last run
-  checkpoint: -1  # -1: last saved model
-  resume_path: None  # updated from load_run and chkpt
--- a/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/reach_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/reach_ppo.yaml
-# seed for the experiment
-seed: 42
-# device for the rl-agent
-device: 'cuda:0'
-policy:
-  init_noise_std: 1.0
-  actor_hidden_dims: [256, 128, 64]
-  critic_hidden_dims: [256, 128, 64]
-  activation: "elu"  # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-  # only required when "policy_class_name" is'ActorCriticRecurrent':
-  # rnn_type: 'lstm'
-  # rnn_hidden_size: 512
-  # rnn_num_layers: 1
-algorithm:
-  # training params
-  value_loss_coef: 1.0
-  use_clipped_value_loss: True
-  # Seems to be the same as clip_range
-  clip_param: 0.2
-  # Seems to be the same as ent_coef
-  entropy_coef: 0.01
-  # ? seems to be the same as n_epochs
-  num_learning_epochs: 8
-  # seems to be the same as batch_size
-  num_mini_batches: 64  # mini batch size: num_envs * nsteps / nminibatches
-  # seems to be same as learning_rate
-  learning_rate: 3.0e-4  # 5.e-4
-  schedule: "adaptive"  # adaptive, fixed
-  # same as gamma
-  gamma: 0.99
-  # seems to be the same as gae_lambda
-  lam: 0.95
-  # seems to be the same as target_kl
-  desired_kl: 0.01
-  # seems to be the same as max_grad_norm
-  max_grad_norm: 1.0
-runner:
-  policy_class_name: "ActorCritic"
-  algorithm_class_name: "PPO"
-  num_steps_per_env: 64  # per iteration
-  max_iterations: 250  # number of policy updates
-  # logging
-  save_interval: 50  # check for potential saves every this many iterations
-  experiment_name: "reach"
-  run_name: ""
-  # load and resume
-  resume: False
-  load_run: -1  # -1: last run
-  checkpoint: -1  # -1: last saved model
-  resume_path: None  # updated from load_run and chkpt
--- a/source/extensions/omni.isaac.orbit_envs/data/sb3/cartpole_sac.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/sb3/cartpole_sac.yaml
-# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
-seed: 42
-# 512×500×16
-n_timesteps: 4096000
-policy: 'MlpPolicy'
-train_freq: 16
-batch_size: 8192
-gamma: 0.99
-gradient_steps: 8
-ent_coef: 0.0
-learning_rate: !!float 3e-4
-policy_kwargs: "dict(
-                  log_std_init=-2,
-                  activation_fn=nn.ELU,
-                  net_arch=[32, 32]
-                )"
-# Uses VecNormalize class to normalize obs
-normalize_input: True
-# Uses VecNormalize class to normalize rew
-normalize_value: True
-clip_obs: 5
--- a/source/extensions/omni.isaac.orbit_envs/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.orbit_envs/docs/CHANGELOG.rst
 Changelog
 ---------
+0.5.0 (2023-10-30)
+~~~~~~~~~~~~~~~~~~
+Changed
+^^^^^^^
+* Changed the way agent configs are handled for environments and learning agents. Switched from yaml to configclasses.
 0.4.3 (2023-09-25)
 ~~~~~~~~~~~~~~~~~~
@@ -12,6 +20,7 @@ Changed
 * Removed the type-hinting from docstrings to simplify maintenance of the documentation. All type-hints are
  now in the code itself.
 0.4.2 (2023-08-29)
 ~~~~~~~~~~~~~~~~~~

--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/__init__.py
@@ -29,12 +29,11 @@ Usage:
 from __future__ import annotations
+import importlib
 import os
+import pkgutil
 import toml
-# TODO: include classics, manipulation again when updated on newest version
-from . import locomotion  # noqa: F401
 # Conveniences to other module directories via relative paths
 ORBIT_ENVS_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
 """Path to the extension source directory."""
@@ -47,3 +46,47 @@ ORBIT_ENVS_METADATA = toml.load(os.path.join(ORBIT_ENVS_EXT_DIR, "config", "exte
 # Configure the module-level variables
 __version__ = ORBIT_ENVS_METADATA["package"]["version"]
+##
+# Register Gym environments.
+##
+def _import_all(package_name: str, blacklist_pkgs: list[str] = None):
+    """Import all sub-packages in a package recursively.
+    It is easier to use this function to import all sub-packages in a package recursively
+    than to manually import each sub-package.
+    It replaces the need of the following code:
+    .. code-block:: python
+        import .locomotion.velocity
+        import .manipulation.reach
+        import .manipulation.lift
+    Args:
+        package_name: The package name.
+        blacklist_pkgs: The list of blacklisted packages to skip. Defaults to None,
+            which means no packages are blacklisted.
+    """
+    # Default blacklist
+    if blacklist_pkgs is None:
+        blacklist_pkgs = []
+    # Import the package
+    package = importlib.import_module(package_name)
+    # Import all Python files
+    for file_name, module_name, is_pkg in pkgutil.walk_packages(package.__path__, package.__name__ + "."):
+        # check blacklisted
+        if any([pkg_name in module_name for pkg_name in blacklist_pkgs]):
+            continue
+        if is_pkg:
+            importlib.import_module(module_name)
+            _import_all(module_name, blacklist_pkgs)
+# The blacklist is used to prevent importing configs from sub-packages
+_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "classic", "manipulation", "utils"]
+# Import all configs in this package
+_import_all(__name__, _BLACKLIST_PKGS)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/ant/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/ant/__init__.py
@@ -7,6 +7,21 @@
 Ant locomotion environment (similar to OpenAI Gym Ant-v2).
 """
-from .ant_env import AntEnv
+import gym
-__all__ = ["AntEnv"]
+from . import agents
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Ant-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/ant_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/ant_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/sb3/ant_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/sb3/ant_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/skrl/ant_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/skrl/ant_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/cartpole/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/cartpole/__init__.py
@@ -7,6 +7,22 @@
 Cartpole balancing environment.
 """
-from .cartpole_env import CartpoleEnv
+import gym
-__all__ = ["CartpoleEnv"]
+from . import agents
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Cartpole-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CARTPOLE_RSL_RL_PPO_CFG",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/cartpole_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/cartpole_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/cartpole/agents/rsl_rl_ppo_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/cartpole/agents/rsl_rl_ppo_cfg.py
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+CARTPOLE_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
+    num_steps_per_env=16,
+    max_iterations=500,
+    save_interval=50,
+    experiment_name="cartpole",
+    run_name="",
+    resume=False,
+    load_run=-1,
+    load_checkpoint=-1,
+    empirical_normalization=False,
+    policy=RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    ),
+    algorithm=RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.01,
+        num_learning_epochs=8,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    ),
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/sb3/cartpole_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/sb3/cartpole_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/skrl/cartpole_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/skrl/cartpole_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/humanoid/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/classic/humanoid/__init__.py
@@ -7,6 +7,21 @@
 Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2).
 """
-from .humanoid_env import HumanoidEnv
+import gym
-__all__ = ["HumanoidEnv"]
+from . import agents
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Humanoid-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/humanoid_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/humanoid_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/sb3/humanoid_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/sb3/humanoid_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/skrl/humanoid_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/skrl/humanoid_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/__init__.py
@@ -3,32 +3,6 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
-"""Locomotion environments for legged robots.
+"""Locomotion environments for legged robots."""
-These environments are based on the `legged_gym` environments provided by Rudin et al.
+from .velocity import *  # noqa
-Reference:
-    https://github.com/leggedrobotics/legged_gym
-"""
-import gym
-from .locomotion_env_cfg import LocomotionEnvRoughCfg, LocomotionEnvRoughCfg_PLAY
-__all__ = ["LocomotionEnvRoughCfg", "LocomotionEnvRoughCfg_PLAY"]
-##
-# Register Gym environments.
-##
-gym.register(
-    id="Isaac-Velocity-Rough-Anymal-C-v0",
-    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
-    kwargs={"cfg_entry_point": "omni.isaac.orbit_envs.locomotion:LocomotionEnvRoughCfg"},
-)
-gym.register(
-    id="Isaac-Velocity-Rough-Anymal-C-Play-v0",
-    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
-    kwargs={"cfg_entry_point": "omni.isaac.orbit_envs.locomotion:LocomotionEnvRoughCfg_PLAY"},
-)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""Locomotion environments with velocity-tracking commands.
+These environments are based on the `legged_gym` environments provided by Rudin et al.
+Reference:
+    https://github.com/leggedrobotics/legged_gym
+"""
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""Configurations for velocity-based locomotion environments."""
+# We leave this file empty since we don't want to expose any configs in this package directly.
+# We still need this file to import the "config" module in the parent package.
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gym
+from . import agents, flat_env_cfg, rough_env_cfg
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Velocity-Flat-Anymal-B-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.AnymalBFlatEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Flat-Anymal-B-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.AnymalBFlatEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Anymal-B-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.AnymalBRoughEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBRoughPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Anymal-B-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.AnymalBRoughEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalBRoughPPORunnerCfg,
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/agents/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/agents/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from . import rsl_rl_cfg  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/agents/rsl_rl_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/agents/rsl_rl_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+@configclass
+class AnymalBRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "anymal_b_rough"
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[512, 256, 128],
+        critic_hidden_dims=[512, 256, 128],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+@configclass
+class AnymalBFlatPPORunnerCfg(AnymalBRoughPPORunnerCfg):
+    def __post_init__(self):
+        super().__post_init__()
+        self.max_iterations = 300
+        self.experiment_name = "anymal_b_flat"
+        self.policy.actor_hidden_dims = [128, 128, 128]
+        self.policy.critic_hidden_dims = [128, 128, 128]
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/flat_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/flat_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.anymal import ANYMAL_B_CFG
+@configclass
+class AnymalBFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to anymal-d
+        self.scene.robot = ANYMAL_B_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        # override rewards
+        self.rewards.flat_orientation_l2.weight = -5.0
+        self.rewards.dof_torques_l2.weight = -2.5e-5
+        self.rewards.feet_air_time.weight = 2.0
+        # change terrain to flat
+        self.scene.terrain.terrain_type = "plane"
+        self.scene.terrain.terrain_generator = None
+        # no height scan
+        self.scene.height_scanner = None
+        self.observations.policy.height_scan = None
+        # no terrain curriculum
+        self.curriculum.terrain_levels = None
+class AnymalBFlatEnvCfg_PLAY(AnymalBFlatEnvCfg):
+    def __post_init__(self) -> None:
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/rough_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_b/rough_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.anymal import ANYMAL_B_CFG
+@configclass
+class AnymalBRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to anymal-d
+        self.scene.robot = ANYMAL_B_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+@configclass
+class AnymalBRoughEnvCfg_PLAY(AnymalBRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # spawn the robot randomly in the grid (instead of their terrain levels)
+        self.scene.terrain.max_init_terrain_level = None
+        # reduce the number of terrains to save memory
+        if self.scene.terrain.terrain_generator is not None:
+            self.scene.terrain.terrain_generator.num_rows = 5
+            self.scene.terrain.terrain_generator.num_cols = 5
+            self.scene.terrain.terrain_generator.curriculum = False
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gym
+from . import agents, flat_env_cfg, rough_env_cfg
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Velocity-Flat-Anymal-C-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.AnymalCFlatEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCFlatPPORunnerCfg,
+        "skrl_cfg_entry_point": "omni.isaac.orbit_envs.locomotion.velocity.anymal_c.agents:skrl_cfg.yaml",
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Flat-Anymal-C-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.AnymalCFlatEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Anymal-C-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.AnymalCRoughEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCRoughPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Anymal-C-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.AnymalCRoughEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalCRoughPPORunnerCfg,
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/agents/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/agents/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from . import rsl_rl_cfg  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/agents/rsl_rl_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/agents/rsl_rl_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+@configclass
+class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "anymal_c_rough"
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[512, 256, 128],
+        critic_hidden_dims=[512, 256, 128],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+@configclass
+class AnymalCFlatPPORunnerCfg(AnymalCRoughPPORunnerCfg):
+    def __post_init__(self):
+        super().__post_init__()
+        self.max_iterations = 300
+        self.experiment_name = "anymal_c_flat"
+        self.policy.actor_hidden_dims = [128, 128, 128]
+        self.policy.critic_hidden_dims = [128, 128, 128]
--- a/source/extensions/omni.isaac.orbit_envs/data/skrl/anymal_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/skrl/anymal_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/flat_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/flat_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.anymal import ANYMAL_C_CFG
+@configclass
+class AnymalCFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to anymal-c
+        self.scene.robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        # override rewards
+        self.rewards.flat_orientation_l2.weight = -5.0
+        self.rewards.dof_torques_l2.weight = -2.5e-5
+        self.rewards.feet_air_time.weight = 2.0
+        # change terrain to flat
+        self.scene.terrain.terrain_type = "plane"
+        self.scene.terrain.terrain_generator = None
+        # no height scan
+        self.scene.height_scanner = None
+        self.observations.policy.height_scan = None
+        # no terrain curriculum
+        self.curriculum.terrain_levels = None
+class AnymalCFlatEnvCfg_PLAY(AnymalCFlatEnvCfg):
+    def __post_init__(self) -> None:
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/rough_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_c/rough_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.anymal import ANYMAL_C_CFG
+@configclass
+class AnymalCRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to anymal-c
+        self.scene.robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+@configclass
+class AnymalCRoughEnvCfg_PLAY(AnymalCRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # spawn the robot randomly in the grid (instead of their terrain levels)
+        self.scene.terrain.max_init_terrain_level = None
+        # reduce the number of terrains to save memory
+        if self.scene.terrain.terrain_generator is not None:
+            self.scene.terrain.terrain_generator.num_rows = 5
+            self.scene.terrain.terrain_generator.num_cols = 5
+            self.scene.terrain.terrain_generator.curriculum = False
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gym
+from . import agents, flat_env_cfg, rough_env_cfg
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Velocity-Flat-Anymal-D-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Flat-Anymal-D-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDFlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Anymal-D-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDRoughPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Anymal-D-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.AnymalDRoughPPORunnerCfg,
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/agents/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/agents/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from . import rsl_rl_cfg  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/agents/rsl_rl_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/agents/rsl_rl_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+@configclass
+class AnymalDRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "anymal_d_rough"
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[512, 256, 128],
+        critic_hidden_dims=[512, 256, 128],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.005,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+@configclass
+class AnymalDFlatPPORunnerCfg(AnymalDRoughPPORunnerCfg):
+    def __post_init__(self):
+        super().__post_init__()
+        self.max_iterations = 300
+        self.experiment_name = "anymal_d_flat"
+        self.policy.actor_hidden_dims = [128, 128, 128]
+        self.policy.critic_hidden_dims = [128, 128, 128]
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/flat_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/flat_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.anymal import ANYMAL_D_CFG
+@configclass
+class AnymalDFlatEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to anymal-d
+        self.scene.robot = ANYMAL_D_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        # override rewards
+        self.rewards.flat_orientation_l2.weight = -5.0
+        self.rewards.dof_torques_l2.weight = -2.5e-5
+        self.rewards.feet_air_time.weight = 2.0
+        # change terrain to flat
+        self.scene.terrain.terrain_type = "plane"
+        self.scene.terrain.terrain_generator = None
+        # no height scan
+        self.scene.height_scanner = None
+        self.observations.policy.height_scan = None
+        # no terrain curriculum
+        self.curriculum.terrain_levels = None
+class AnymalDFlatEnvCfg_PLAY(AnymalDFlatEnvCfg):
+    def __post_init__(self) -> None:
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/rough_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/anymal_d/rough_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.anymal import ANYMAL_D_CFG
+@configclass
+class AnymalDRoughEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to anymal-d
+        self.scene.robot = ANYMAL_D_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+@configclass
+class AnymalDRoughEnvCfg_PLAY(AnymalDRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # spawn the robot randomly in the grid (instead of their terrain levels)
+        self.scene.terrain.max_init_terrain_level = None
+        # reduce the number of terrains to save memory
+        if self.scene.terrain.terrain_generator is not None:
+            self.scene.terrain.terrain_generator.num_rows = 5
+            self.scene.terrain.terrain_generator.num_cols = 5
+            self.scene.terrain.terrain_generator.curriculum = False
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import gym
+from . import agents, flat_env_cfg, rough_env_cfg
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Velocity-Flat-Unitree-A1-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.UnitreeA1FlatEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1FlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Flat-Unitree-A1-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": flat_env_cfg.UnitreeA1FlatEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1FlatPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Unitree-A1-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.UnitreeA1RoughEnvCfg,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1RoughPPORunnerCfg,
+    },
+)
+gym.register(
+    id="Isaac-Velocity-Rough-Unitree-A1-Play-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": rough_env_cfg.UnitreeA1RoughEnvCfg_PLAY,
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_cfg.UnitreeA1RoughPPORunnerCfg,
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/agents/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/agents/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from . import rsl_rl_cfg  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/agents/rsl_rl_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/agents/rsl_rl_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+@configclass
+class UnitreeA1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 24
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "unitree_a1_rough"
+    empirical_normalization = False
+    policy = RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[512, 256, 128],
+        critic_hidden_dims=[512, 256, 128],
+        activation="elu",
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.01,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
+@configclass
+class UnitreeA1FlatPPORunnerCfg(UnitreeA1RoughPPORunnerCfg):
+    def __post_init__(self):
+        super().__post_init__()
+        self.max_iterations = 300
+        self.experiment_name = "unitree_a1_flat"
+        self.policy.actor_hidden_dims = [128, 128, 128]
+        self.policy.critic_hidden_dims = [128, 128, 128]
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/flat_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/flat_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.unitree import UNITREE_A1_CFG
+@configclass
+class UnitreeA1FlatEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to unitree a1
+        self.scene.robot = UNITREE_A1_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        # reduce action scale
+        self.actions.joint_pos.scale = 0.25
+        # override rewards
+        self.rewards.flat_orientation_l2.weight = -2.5
+        self.rewards.feet_air_time.weight = 1.0
+        # self.rewards.dof_torques_l2.weight = -2.0e-4
+        # self.rewards.dof_pos_limits.weight = -10.0
+        # change terrain to flat
+        self.scene.terrain.terrain_type = "plane"
+        self.scene.terrain.terrain_generator = None
+        # no height scan
+        self.scene.height_scanner = None
+        self.observations.policy.height_scan = None
+        # no terrain curriculum
+        self.curriculum.terrain_levels = None
+        # disable pushing for now
+        self.randomization.push_robot = None
+        self.randomization.add_base_mass = None
+        # change body and joint names
+        # TODO: Change to .*foot once we make a new USD for the robot
+        self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*calf"
+        self.rewards.undesired_contacts.params["sensor_cfg"].body_names = [".*thigh", ".*hip"]
+class UnitreeA1FlatEnvCfg_PLAY(UnitreeA1FlatEnvCfg):
+    def __post_init__(self) -> None:
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/rough_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/velocity/config/unitree_a1/rough_env_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
+from omni.isaac.orbit_envs.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg
+##
+# Pre-defined configs
+##
+# isort: off
+from omni.isaac.orbit.assets.config.unitree import UNITREE_A1_CFG
+@configclass
+class UnitreeA1RoughEnvCfg(LocomotionVelocityRoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # switch robot to unitree-a1
+        self.scene.robot = UNITREE_A1_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+        # reduce action scale
+        self.actions.joint_pos.scale = 0.25
+        # override rewards
+        # self.rewards.dof_torques_l2.weight = -2.0e-4
+        # self.rewards.dof_pos_limits.weight = -10.0
+        # disable pushing for now
+        self.randomization.push_robot = None
+        self.randomization.add_base_mass = None
+        # change body and joint names
+        self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*calf"  # TODO: Change to .*foot
+        self.rewards.undesired_contacts.params["sensor_cfg"].body_names = [".*thigh", ".*hip"]
+@configclass
+class UnitreeA1RoughEnvCfg_PLAY(UnitreeA1RoughEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # spawn the robot randomly in the grid (instead of their terrain levels)
+        self.scene.terrain.max_init_terrain_level = None
+        # reduce the number of terrains to save memory
+        if self.scene.terrain.terrain_generator is not None:
+            self.scene.terrain.terrain_generator.num_rows = 5
+            self.scene.terrain.terrain_generator.num_cols = 5
+            self.scene.terrain.terrain_generator.curriculum = False
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
+        # remove random pushing
+        self.randomization.base_external_force_torque = None
+        self.randomization.push_robot = None
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/mdp/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/mdp/__init__.py
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/mdp/rewards.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/mdp/rewards.py
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/locomotion_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/locomotion/locomotion_env_cfg.py
@@ -6,9 +6,10 @@
 from __future__ import annotations
 import math
+from dataclasses import MISSING
 import omni.isaac.orbit.sim as sim_utils
-from omni.isaac.orbit.assets import AssetBaseCfg
+from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
 from omni.isaac.orbit.command_generators import UniformVelocityCommandGeneratorCfg
 from omni.isaac.orbit.envs import RLEnvCfg
 from omni.isaac.orbit.managers import CurriculumTermCfg as CurrTerm
@@ -24,13 +25,12 @@ from omni.isaac.orbit.terrains import TerrainImporterCfg
 from omni.isaac.orbit.utils import configclass
 from omni.isaac.orbit.utils.noise import AdditiveUniformNoiseCfg as Unoise
-import omni.isaac.orbit_envs.locomotion.mdp as mdp
+import omni.isaac.orbit_envs.locomotion.velocity.mdp as mdp
 ##
 # Pre-defined configs
 ##
 # isort: off
-from omni.isaac.orbit.assets.config.anymal import ANYMAL_C_CFG
 from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG
@@ -40,7 +40,7 @@ from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG
 @configclass
-class TerrainSceneCfg(InteractiveSceneCfg):
+class MySceneCfg(InteractiveSceneCfg):
    """Configuration for the terrain scene with a legged robot."""
    # ground terrain
@@ -59,7 +59,7 @@ class TerrainSceneCfg(InteractiveSceneCfg):
        debug_vis=False,
    )
    # robots
-    robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
+    robot: ArticulationCfg = MISSING
    # sensors
    height_scanner = RayCasterCfg(
        prim_path="{ENV_REGEX_NS}/Robot/base",
@@ -73,10 +73,7 @@ class TerrainSceneCfg(InteractiveSceneCfg):
    # lights
    light = AssetBaseCfg(
        prim_path="/World/light",
-        spawn=sim_utils.DistantLightCfg(
+        spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
-            color=(1.0, 1.0, 1.0),
-            intensity=1000.0,
-        ),
    )
@@ -217,6 +214,9 @@ class RewardsCfg:
        weight=-1.0,
        params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), "threshold": 1.0},
    )
+    # -- optional penalties
+    flat_orientation_l2 = RewTerm(func=mdp.flat_orientation_l2, weight=0.0)
+    dof_pos_limits = RewTerm(func=mdp.joint_pos_limits, weight=0.0)
 @configclass
@@ -243,11 +243,11 @@ class CurriculumCfg:
 @configclass
-class LocomotionEnvRoughCfg(RLEnvCfg):
+class LocomotionVelocityRoughEnvCfg(RLEnvCfg):
    """Configuration for the locomotion velocity-tracking environment."""
    # Scene settings
-    scene: TerrainSceneCfg = TerrainSceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False)
+    scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=False)
    # Basic settings
    observations: ObservationsCfg = ObservationsCfg()
    actions: ActionsCfg = ActionsCfg()
@@ -279,8 +279,10 @@ class LocomotionEnvRoughCfg(RLEnvCfg):
        self.sim.physics_material = self.scene.terrain.physics_material
        # update sensor update periods
        # we tick all the sensors based on the smallest update period (physics update period)
-        self.scene.height_scanner.update_period = self.decimation * self.sim.dt
+        if self.scene.height_scanner is not None:
-        self.scene.contact_forces.update_period = self.sim.dt
+            self.scene.height_scanner.update_period = self.decimation * self.sim.dt
+        if self.scene.contact_forces is not None:
+            self.scene.contact_forces.update_period = self.sim.dt
        # check if terrain levels curriculum is enabled - if so, enable curriculum for terrain generator
        # this generates terrains with increasing difficulty and is useful for training
@@ -290,25 +292,3 @@ class LocomotionEnvRoughCfg(RLEnvCfg):
        else:
            if self.scene.terrain.terrain_generator is not None:
                self.scene.terrain.terrain_generator.curriculum = False
-@configclass
-class LocomotionEnvRoughCfg_PLAY(LocomotionEnvRoughCfg):
-    """Configuration for the locomotion velocity environment."""
-    def __post_init__(self) -> None:
-        super().__post_init__()
-        # make a smaller scene for play
-        self.scene.num_envs = 50
-        self.scene.env_spacing = 2.5
-        # spawn the robot randomly in the grid (instead of their terrain levels)
-        self.scene.terrain.max_init_terrain_level = None
-        # reduce the number of terrains to save memory
-        if self.scene.terrain.terrain_generator is not None:
-            self.scene.terrain.terrain_generator.num_rows = 5
-            self.scene.terrain.terrain_generator.num_cols = 5
-            self.scene.terrain.terrain_generator.curriculum = False
-        # disable randomization for play
-        self.observations.policy.enable_corruption = False
--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/cabinet_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/cabinet_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/cabinet/agents/rsl_rl_ppo_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/cabinet/agents/rsl_rl_ppo_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# TODO: be moved once CabinetEnv exists
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+CABINET_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
+    num_steps_per_env=64,
+    max_iterations=200,
+    save_interval=50,
+    experiment_name="cabinet",
+    run_name="",
+    resume=False,
+    load_run=-1,
+    load_checkpoint=-1,
+    empirical_normalization=False,
+    policy=RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    ),
+    algorithm=RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.00,
+        num_learning_epochs=8,
+        num_mini_batches=8,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    ),
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/sb3/cabinet_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/sb3/cabinet_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/__init__.py
@@ -3,9 +3,28 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
-"""Environment for lifting objects with fixed-arm robots."""
+"""
+Environment for lifting an object with fixed-base robot.
+"""
-from .lift_cfg import LiftEnvCfg
+import gym
-from .lift_env import LiftEnv
-__all__ = ["LiftEnv", "LiftEnvCfg"]
+from . import agents
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Lift-Franka-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.lift_env_cfg:LiftEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+        "robomimic_bc_cfg_entry_point": f"{agents.__name__}.robomimic:bc.json",
+        "robomimic_bcq_cfg_entry_point": f"{agents.__name__}.robomimic:bcq.json",
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/reach_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/reach_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/robomimic/lift_bc.json
+++ b/source/extensions/omni.isaac.orbit_envs/data/robomimic/lift_bc.json
--- a/source/extensions/omni.isaac.orbit_envs/data/robomimic/lift_bcq.json
+++ b/source/extensions/omni.isaac.orbit_envs/data/robomimic/lift_bcq.json
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/agents/rsl_rl_ppo_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/agents/rsl_rl_ppo_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+LIFT_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
+    num_steps_per_env=96,
+    max_iterations=700,
+    save_interval=50,
+    experiment_name="lift",
+    run_name="",
+    resume=False,
+    load_run=-1,
+    load_checkpoint=-1,
+    empirical_normalization=False,
+    policy=RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    ),
+    algorithm=RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.01,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    ),
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/sb3/reach_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/sb3/reach_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/data/skrl/lift_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/skrl/lift_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/reach/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/reach/__init__.py
@@ -5,6 +5,21 @@
 """Environment for end-effector pose tracking task for fixed-arm robots."""
-from .reach_env import ReachEnv, ReachEnvCfg
+import gym
-__all__ = ["ReachEnv", "ReachEnvCfg"]
+from . import agents
+##
+# Register Gym environments.
+##
+gym.register(
+    id="Isaac-Reach-Franka-v0",
+    entry_point="omni.isaac.orbit.envs.rl_env:RLEnv",
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.reach_env_cfg:ReachEnvCfg",
+        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:LIFT_RSL_RL_PPO_CFG",
+        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+    },
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/lift_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/lift_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/reach/agents/rsl_rl_ppo_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/reach/agents/rsl_rl_ppo_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
+    RslRlPpoActorCriticCfg,
+    RslRlPpoAlgorithmCfg,
+)
+REACH_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
+    num_steps_per_env=64,
+    max_iterations=250,
+    save_interval=50,
+    experiment_name="reach",
+    run_name="",
+    resume=False,
+    load_run=-1,
+    load_checkpoint=-1,
+    empirical_normalization=False,
+    policy=RslRlPpoActorCriticCfg(
+        init_noise_std=1.0,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        activation="elu",
+    ),
+    algorithm=RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.01,
+        num_learning_epochs=8,
+        num_mini_batches=64,
+        learning_rate=3.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    ),
+)
--- a/source/extensions/omni.isaac.orbit_envs/data/skrl/reach_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/skrl/reach_ppo.yaml
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/__init__.py
@@ -3,68 +3,8 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
-"""Utilities for environments."""
+"""Utilities and wrappers for environments."""
+from .parse_cfg import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
-import os
+__all__ = ["load_cfg_from_registry", "parse_env_cfg", "get_checkpoint_path"]
-# submodules
-from .parse_cfg import load_default_env_cfg, parse_env_cfg
-__all__ = ["load_default_env_cfg", "parse_env_cfg", "get_checkpoint_path"]
-def get_checkpoint_path(log_path: str, run_dir: str = "*", checkpoint: str = None) -> str:
-    """Get path to the model checkpoint in input directory.
-    The checkpoint file is resolved as: <log_path>/<run_dir>/<checkpoint>.
-    Args:
-        log_path: The log directory path to find models in.
-        run_dir: The name of the directory containing the run. Defaults to the most
-            recent directory created inside :obj:`log_dir`.
-        checkpoint: The model checkpoint file or directory name. Defaults to the most recent
-            recent torch-model saved in the :obj:`run_dir` directory.
-    Raises:
-        ValueError: When no runs are found in the input directory.
-        ValueError: When no checkpoints are found in the input directory.
-    Returns:
-        The path to the model checkpoint.
-    Reference:
-        https://github.com/leggedrobotics/legged_gym/blob/master/legged_gym/utils/helpers.py#L103
-    """
-    # check if runs present in directory
-    try:
-        # find all runs in the directory
-        runs = [os.path.join(log_path, run) for run in os.scandir(log_path)]
-        runs = [run for run in runs if os.path.isdir(run)]
-        # sort by date to handle change of month
-        runs = sorted(runs, key=os.path.getmtime)
-        # create last run file path
-        last_run_path = runs[-1]
-    except IndexError:
-        raise ValueError(f"No runs present in the directory: {log_path}")
-    # path to the directory containing the run
-    if run_dir.startswith("*"):
-        # check if there are other paths. Example: "*/nn"
-        run_path = run_dir.replace("*", last_run_path)
-    else:
-        run_path = os.path.join(log_path, run_dir)
-    # name of model checkpoint
-    if checkpoint is None:
-        # list all model checkpoints in the directory
-        model_checkpoints = [f for f in os.listdir(run_path) if ".pt" in f]
-        # check if any checkpoints are present
-        if len(model_checkpoints) == 0:
-            raise ValueError(f"No checkpoints present in the directory: {run_path}")
-        # sort by date
-        model_checkpoints.sort(key=lambda m: f"{m:0>15}")
-        # get latest model checkpoint file
-        checkpoint_file = model_checkpoints[-1]
-    else:
-        checkpoint_file = checkpoint
-    return os.path.join(run_path, checkpoint_file)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/parse_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/parse_cfg.py
@@ -3,7 +3,7 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
-"""Utilities for parsing and loading environments configurations."""
+"""Utilities for parsing and loading configurations."""
 from __future__ import annotations
@@ -11,35 +11,56 @@ import gym
 import importlib
 import inspect
 import os
+import re
 import yaml
 from typing import Any
 from omni.isaac.orbit.utils import update_class_from_dict, update_dict
-def load_default_env_cfg(task_name: str) -> dict | Any:
+def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | Any:
-    """Load default configuration file for an environment from Gym registry.
+    """Load default configuration given its entry point from the gym registry.
-    This function resolves the configuration file for environment based on the file type.
+    This function loads the configuration object from the gym registry for the given task name.
    It supports both YAML and Python configuration files.
+    It expects the configuration to be registered in the gym registry as:
+    .. code-block:: python
+        gym.register(
+            id="My-Awesome-Task-v0",
+            ...
+            kwargs={"env_entry_point_cfg": "path.to.config:ConfigClass"},
+        )
+    Usage:
+        .. code-block:: python
+            from omni.isaac.orbit_envs.utils.parse_cfg import load_cfg_from_registry
+            cfg = load_cfg_from_registry("My-Awesome-Task-v0", "env_entry_point_cfg")
    Args:
        task_name: The name of the environment.
+        entry_point_key: The entry point key to resolve the configuration file.
    Returns:
-        The parsed configuration object.
+        The parsed configuration object. This is either a dictionary or a class object.
    Raises:
-        ValueError: If the task name is not provided, i.e. None.
+        ValueError: If the entry point key is not available in the gym registry for the task.
    """
-    # check if a task name is provided
+    # obtain the configuration entry point
-    if task_name is None:
+    cfg_entry_point = gym.spec(task_name)._kwargs.pop(entry_point_key)
-        raise ValueError("Please provide a valid task name. Hint: Use --task <task_name>.")
+    # check if entry point exists
-    # retrieve the configuration file to load
+    if cfg_entry_point is None:
-    cfg_entry_point: str = gym.spec(task_name)._kwargs.pop("cfg_entry_point")
+        raise ValueError(
+            f"Could not find configuration for the environment: '{task_name}'."
+            f" Please check that the gym registry has the entry point: '{entry_point_key}'."
+        )
    # parse the default config file
-    if cfg_entry_point.endswith(".yaml"):
+    if isinstance(cfg_entry_point, str) and cfg_entry_point.endswith(".yaml"):
        if os.path.exists(cfg_entry_point):
            # absolute path for the config file
            config_file = cfg_entry_point
@@ -50,8 +71,8 @@ def load_default_env_cfg(task_name: str) -> dict | Any:
            # obtain the configuration file path
            config_file = os.path.join(mod_path, file_name)
        # load the configuration
-        print(f"[INFO]: Parsing default environment configuration from: {config_file}")
+        print(f"[INFO]: Parsing configuration from: {config_file}")
-        with open(config_file) as f:
+        with open(config_file, encoding="utf-8") as f:
            cfg = yaml.full_load(f)
    else:
        if callable(cfg_entry_point):
@@ -65,22 +86,24 @@ def load_default_env_cfg(task_name: str) -> dict | Any:
            mod = importlib.import_module(mod_name)
            cfg_cls = getattr(mod, attr_name)
        # load the configuration
-        print(f"[INFO]: Parsing default environment configuration from: {inspect.getfile(cfg_cls)}")
+        print(f"[INFO]: Parsing configuration from: {cfg_entry_point}")
-        cfg = cfg_cls()
+        if callable(cfg_cls):
+            cfg = cfg_cls()
+        else:
+            cfg = cfg_cls
    return cfg
-def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, **kwargs) -> dict | Any:
+def parse_env_cfg(task_name: str, use_gpu: bool | None = None, num_envs: int | None = None) -> dict | Any:
-    """Parse configuration file for an environment and override based on inputs.
+    """Parse configuration for an environment and override based on inputs.
    Args:
        task_name: The name of the environment.
-        use_gpu: Whether to use GPU/CPU pipeline. Defaults to True.
+        use_gpu: Whether to use GPU/CPU pipeline. Defaults to None, in which case it is left unchanged.
-        num_envs: Number of environments to create. Defaults to True.
+        num_envs: Number of environments to create. Defaults to None, in which case it is left unchanged.
    Returns:
-        The parsed configuration object.
+        The parsed configuration object. This is either a dictionary or a class object.
    Raises:
        ValueError: If the task name is not provided, i.e. None.
@@ -91,28 +114,22 @@ def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, **
    # create a dictionary to update from
    args_cfg = {"sim": {"physx": dict()}, "scene": dict()}
    # resolve pipeline to use (based on input)
-    if not use_gpu:
+    if use_gpu is not None:
-        args_cfg["sim"]["use_gpu_pipeline"] = False
+        if not use_gpu:
-        args_cfg["sim"]["physx"]["use_gpu"] = False
+            args_cfg["sim"]["use_gpu_pipeline"] = False
-        args_cfg["sim"]["device"] = "cpu"
+            args_cfg["sim"]["physx"]["use_gpu"] = False
-    else:
+            args_cfg["sim"]["device"] = "cpu"
-        args_cfg["sim"]["use_gpu_pipeline"] = True
+        else:
-        args_cfg["sim"]["physx"]["use_gpu"] = True
+            args_cfg["sim"]["use_gpu_pipeline"] = True
-        args_cfg["sim"]["device"] = "cuda:0"
+            args_cfg["sim"]["physx"]["use_gpu"] = True
-    # FIXME (15.05.2022): if environment name has soft, then GPU pipeline is not supported yet
+            args_cfg["sim"]["device"] = "cuda:0"
-    if "soft" in task_name.lower():
-        args_cfg["sim"]["use_gpu_pipeline"] = False
-        args_cfg["sim"]["physx"]["use_gpu"] = True
-        args_cfg["sim"]["device"] = "cpu"
-        args_cfg["sim"]["use_flatcache"] = False
-        args_cfg["sim"]["use_fabric"] = False
    # number of environments
    if num_envs is not None:
        args_cfg["scene"]["num_envs"] = num_envs
-    # load the configuration
+    # load the default configuration
-    cfg = load_default_env_cfg(task_name)
+    cfg = load_cfg_from_registry(task_name, "env_cfg_entry_point")
    # update the main configuration
    if isinstance(cfg, dict):
        cfg = update_dict(cfg, args_cfg)
@@ -120,3 +137,59 @@ def parse_env_cfg(task_name: str, use_gpu: bool = True, num_envs: int = None, **
        update_class_from_dict(cfg, args_cfg)
    return cfg
+def get_checkpoint_path(
+    log_path: str, run_dir: str = "*", checkpoint: str = "*", sort_alphabetical: bool = True
+) -> str:
+    """Get path to the model checkpoint in input directory.
+    The checkpoint file is resolved as: <log_path>/<run_dir>/<checkpoint>.
+    If run_dir and checkpoint are regex expressions then the most recent (highest alphabetical order) run and checkpoint are selected.
+    Args:
+        log_path: The log directory path to find models in.
+        run_dir: Regex expression for the name of the directory containing the run. Defaults to the most
+            recent directory created inside :obj:`log_dir`.
+        checkpoint: The model checkpoint file or directory name. Defaults to the most recent
+            torch-model saved in the :obj:`run_dir` directory.
+        sort_alphabetical: Whether to sort the runs and checkpoints by alphabetical order. Defaults to True.
+            If False, the checkpoints are sorted by the last modified time.
+    Raises:
+        ValueError: When no runs are found in the input directory.
+        ValueError: When no checkpoints are found in the input directory.
+    Returns:
+        The path to the model checkpoint.
+    Reference:
+        https://github.com/leggedrobotics/legged_gym/blob/master/legged_gym/utils/helpers.py#L103
+    """
+    # check if runs present in directory
+    try:
+        # find all runs in the directory that math the regex expression
+        runs = [
+            os.path.join(log_path, run) for run in os.scandir(log_path) if run.is_dir() and re.match(run_dir, run.name)
+        ]
+        # sort matched runs by alphabetical order (latest run should be last)
+        if sort_alphabetical:
+            runs.sort()
+        else:
+            runs = sorted(runs, key=os.path.getmtime)
+        # create last run file path
+        run_path = runs[-1]
+    except IndexError:
+        raise ValueError(f"No runs present in the directory: '{log_path}' match: '{run_dir}'.")
+    # list all model checkpoints in the directory
+    model_checkpoints = [f for f in os.listdir(run_path) if re.match(checkpoint, f)]
+    # check if any checkpoints are present
+    if len(model_checkpoints) == 0:
+        raise ValueError(f"No checkpoints in the directory: '{run_path}' match '{checkpoint}'.")
+    # sort alphabetically while ensuring that *_10 comes after *_9
+    model_checkpoints.sort(key=lambda m: f"{m:0>15}")
+    # get latest matched checkpoint file
+    checkpoint_file = model_checkpoints[-1]
+    return os.path.join(run_path, checkpoint_file)
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl/__init__.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl/__init__.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from .exporter import export_policy_as_jit, export_policy_as_onnx
+from .rl_cfg import *
+from .vecenv_wrapper import RslRlVecEnvWrapper
+__all__ = [
+    # wrapper
+    "RslRlVecEnvWrapper",
+    # rl-config
+    "RslRlPpoActorCriticCfg",
+    "RslRlPpoAlgorithmCfg",
+    "RslRlOnPolicyRunnerCfg",
+    # exporters
+    "export_policy_as_jit",
+    "export_policy_as_onnx",
+]
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl.py
@@ -3,125 +3,11 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
-"""Wrapper to configure an :class:`RLEnv` instance to RSL-RL vectorized environment.
-The following example shows how to wrap an environment for RSL-RL:
-.. code-block:: python
-    from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
-    env = RslRlVecEnvWrapper(env)
-"""
-from __future__ import annotations
 import copy
-import gym
-import gym.spaces
 import os
 import torch
-from typing import Dict, Optional, Tuple
-from omni.isaac.orbit.envs import RLEnv
-__all__ = ["RslRlVecEnvWrapper", "export_policy_as_jit", "export_policy_as_onnx"]
+__all__ = ["export_policy_as_jit", "export_policy_as_onnx"]
-"""
-Vectorized environment wrapper.
-"""
-# VecEnvObs is what is returned by the reset() method
-# it contains the observation for each env
-VecEnvObs = Tuple[torch.Tensor, Optional[torch.Tensor]]
-# VecEnvStepReturn is what is returned by the step() method
-# it contains the observation (actor and critic), reward, done, info for each env
-VecEnvStepReturn = Tuple[VecEnvObs, VecEnvObs, torch.Tensor, torch.Tensor, Dict]
-class RslRlVecEnvWrapper(gym.Wrapper):
-    """Wraps around Isaac Orbit environment for RSL-RL.
-    To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_states` (int)
-    and :attr:`state_space` (:obj:`gym.spaces.Box`). These are used by the learning agent to allocate buffers in
-    the trajectory memory. Additionally, the method :meth:`_get_observations()` should have the key "critic"
-    which corresponds to the privileged observations. Since this is optional for some environments, the wrapper
-    checks if these attributes exist. If they don't then the wrapper defaults to zero as number of privileged
-    observations.
-    Reference:
-        https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
-    """
-    def __init__(self, env: RLEnv):
-        """Initializes the wrapper.
-        Args:
-            env: The environment to wrap around.
-        Raises:
-            ValueError: When the environment is not an instance of :class:`RLEnv`.
-            ValueError: When the observation space is not a :obj:`gym.spaces.Box`.
-        """
-        # check that input is valid
-        if not isinstance(env.unwrapped, RLEnv):
-            raise ValueError(f"The environment must be inherited from RLEnv. Environment type: {type(env)}")
-        # initialize the wrapper
-        gym.Wrapper.__init__(self, env)
-        # store information required by wrapper
-        orbit_env: RLEnv = self.env.unwrapped
-        self.num_envs = orbit_env.num_envs
-        self.num_actions = orbit_env.action_manager.total_action_dim
-        self.num_obs = orbit_env.observation_manager.group_obs_dim["policy"][0]
-        # reset at the start since the RSL-RL runner does not call reset
-        self.env.reset()
-    """
-    Properties
-    """
-    def get_observations(self) -> torch.Tensor:
-        """Returns the current observations of the environment."""
-        obs_dict = self.env.unwrapped.observation_manager.compute()
-        return obs_dict["policy"], {"observations": obs_dict}
-    @property
-    def episode_length_buf(self) -> torch.Tensor:
-        """The episode length buffer."""
-        return self.env.unwrapped.episode_length_buf
-    @episode_length_buf.setter
-    def episode_length_buf(self, value: torch.Tensor):
-        """Set the episode length buffer.
-        Note: This is needed to perform random initialization of episode lengths in RSL-RL.
-        """
-        self.env.unwrapped.episode_length_buf = value
-    """
-    Operations - MDP
-    """
-    def reset(self) -> VecEnvObs:  # noqa: D102
-        # reset the environment
-        obs_dict = self.env.reset()
-        # return observations
-        return obs_dict["policy"], {"observations": obs_dict}
-    def step(self, actions: torch.Tensor) -> VecEnvStepReturn:  # noqa: D102
-        # record step information
-        obs_dict, rew, dones, extras = self.env.step(actions)
-        # return step information
-        obs = obs_dict["policy"]
-        extras["observations"] = obs_dict
-        return obs, rew, dones.to(torch.long), extras
-"""
-Helper functions.
-"""
 def export_policy_as_jit(actor_critic: object, path: str, filename="policy.pt"):

--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl/rl_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl/rl_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from __future__ import annotations
+from dataclasses import MISSING
+from omni.isaac.orbit.utils import configclass
+__all__ = ["RslRlPpoActorCriticCfg", "RslRlPpoAlgorithmCfg", "RslRlOnPolicyRunnerCfg"]
+@configclass
+class RslRlPpoActorCriticCfg:
+    """Configuration for the PPO actor-critic networks."""
+    class_name: str = "ActorCritic"
+    """The policy class name. Default is ActorCritic."""
+    init_noise_std: float = MISSING
+    """The initial noise standard deviation for the policy."""
+    actor_hidden_dims: list[int] = MISSING
+    """The hidden dimensions of the actor network."""
+    critic_hidden_dims: list[int] = MISSING
+    """The hidden dimensions of the critic network."""
+    activation: str = MISSING
+    """The activation function for the actor and critic networks."""
+@configclass
+class RslRlPpoAlgorithmCfg:
+    """Configuration for the PPO algorithm."""
+    class_name: str = "PPO"
+    """The algorithm class name. Default is PPO."""
+    value_loss_coef: float = MISSING
+    """The coefficient for the value loss."""
+    use_clipped_value_loss: bool = MISSING
+    """Whether to use clipped value loss."""
+    clip_param: float = MISSING
+    """The clipping parameter for the policy."""
+    entropy_coef: float = MISSING
+    """The coefficient for the entropy loss."""
+    num_learning_epochs: int = MISSING
+    """The number of learning epochs per update."""
+    num_mini_batches: int = MISSING
+    """The number of mini-batches per update."""
+    learning_rate: float = MISSING
+    """The learning rate for the policy."""
+    schedule: str = MISSING
+    """The learning rate schedule."""
+    gamma: float = MISSING
+    """The discount factor."""
+    lam: float = MISSING
+    """The lambda parameter for Generalized Advantage Estimation (GAE)."""
+    desired_kl: float = MISSING
+    """The desired KL divergence."""
+    max_grad_norm: float = MISSING
+    """The maximum gradient norm."""
+@configclass
+class RslRlOnPolicyRunnerCfg:
+    """Configuration of the runner for on-policy algorithms."""
+    seed: int = 42
+    """The seed for the experiment. Default is 42."""
+    device: str = "cuda"
+    """The device for the rl-agent. Default is cuda."""
+    num_steps_per_env: int = MISSING
+    """The number of steps per environment per update."""
+    max_iterations: int = MISSING
+    """The maximum number of iterations."""
+    empirical_normalization: bool = MISSING
+    """Whether to use empirical normalization."""
+    save_interval: int = MISSING
+    """The number of iterations between saves."""
+    experiment_name: str = MISSING
+    """The experiment name."""
+    run_name: str = ""
+    """The run name. Default is empty string."""
+    resume: bool = False
+    """Whether to resume. Default is False."""
+    load_run: str = ".*"
+    """The run directory to load. Default is ".*" (all).
+    If regex expression, the latest (alphabetical order) matching run will be loaded.
+    """
+    load_checkpoint: str = "model_.*.pt"
+    """The checkpoint file to load. Default is "model_.*.pt" (all).
+    If regex expression, the latest (alphabetical order) matching file will be loaded.
+    """
+    policy: RslRlPpoActorCriticCfg = MISSING
+    """The policy configuration."""
+    algorithm: RslRlPpoAlgorithmCfg = MISSING
+    """The algorithm configuration."""
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl/vecenv_wrapper.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/rsl_rl/vecenv_wrapper.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""Wrapper to configure an :class:`RLEnv` instance to RSL-RL vectorized environment.
+The following example shows how to wrap an environment for RSL-RL:
+.. code-block:: python
+    from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
+    env = RslRlVecEnvWrapper(env)
+"""
+from __future__ import annotations
+import gym
+import gym.spaces
+import torch
+from omni.isaac.orbit.envs import RLEnv
+class RslRlVecEnvWrapper(gym.Wrapper):
+    """Wraps around Isaac Orbit environment for RSL-RL library
+    To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_states` (int)
+    and :attr:`state_space` (:obj:`gym.spaces.Box`). These are used by the learning agent to allocate buffers in
+    the trajectory memory. Additionally, the method :meth:`_get_observations()` should have the key "critic"
+    which corresponds to the privileged observations. Since this is optional for some environments, the wrapper
+    checks if these attributes exist. If they don't then the wrapper defaults to zero as number of privileged
+    observations.
+    Reference:
+        https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
+    """
+    def __init__(self, env: RLEnv):
+        """Initializes the wrapper.
+        Args:
+            env: The environment to wrap around.
+        Raises:
+            ValueError: When the environment is not an instance of :class:`RLEnv`.
+        """
+        # check that input is valid
+        if not isinstance(env.unwrapped, RLEnv):
+            raise ValueError(f"The environment must be inherited from RLEnv. Environment type: {type(env)}")
+        # initialize the wrapper
+        gym.Wrapper.__init__(self, env)
+        # store information required by wrapper
+        orbit_env: RLEnv = self.env.unwrapped
+        self.num_envs = orbit_env.num_envs
+        self.num_actions = orbit_env.action_manager.total_action_dim
+        self.num_obs = orbit_env.observation_manager.group_obs_dim["policy"][0]
+        # reset at the start since the RSL-RL runner does not call reset
+        self.env.reset()
+    """
+    Properties
+    """
+    def get_observations(self) -> torch.Tensor:
+        """Returns the current observations of the environment."""
+        obs_dict = self.env.unwrapped.observation_manager.compute()
+        return obs_dict["policy"], {"observations": obs_dict}
+    @property
+    def episode_length_buf(self) -> torch.Tensor:
+        """The episode length buffer."""
+        return self.env.unwrapped.episode_length_buf
+    @episode_length_buf.setter
+    def episode_length_buf(self, value: torch.Tensor):
+        """Set the episode length buffer.
+        Note: This is needed to perform random initialization of episode lengths in RSL-RL.
+        """
+        self.env.unwrapped.episode_length_buf = value
+    """
+    Operations - MDP
+    """
+    def reset(self) -> tuple[torch.Tensor, dict]:
+        # reset the environment
+        obs_dict = self.env.reset()
+        # return observations
+        return obs_dict["policy"], {"observations": obs_dict}
+    def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
+        # record step information
+        obs_dict, rew, dones, extras = self.env.step(actions)
+        # return step information
+        obs = obs_dict["policy"]
+        extras["observations"] = obs_dict
+        return obs, rew, dones, extras
--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/sb3.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/sb3.py
@@ -15,18 +15,49 @@ The following example shows how to wrap an environment for Stable-Baselines3:
 """
+from __future__ import annotations
 import gym
 import numpy as np
 import torch
-from typing import Any, Dict, List
+from typing import Any
-# stable-baselines3
 from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn
 from omni.isaac.orbit.envs import RLEnv
-__all__ = ["Sb3VecEnvWrapper"]
+__all__ = ["process_sb3_cfg", "Sb3VecEnvWrapper"]
+"""
+Configuration Parser.
+"""
+def process_sb3_cfg(cfg: dict) -> dict:
+    """Convert simple YAML types to Stable-Baselines classes/components.
+    Args:
+        cfg: A configuration dictionary.
+    Returns:
+        A dictionary containing the converted configuration.
+    Reference:
+        https://github.com/DLR-RM/rl-baselines3-zoo/blob/0e5eb145faefa33e7d79c7f8c179788574b20da5/utils/exp_manager.py#L358
+    """
+    _direct_eval = ["policy_kwargs", "replay_buffer_class", "replay_buffer_kwargs"]
+    def update_dict(d):
+        for key, value in d.items():
+            if isinstance(value, dict):
+                update_dict(value)
+            else:
+                if key in _direct_eval:
+                    d[key] = eval(value)
+        return d
+    # parse agent configuration and convert to classes
+    return update_dict(cfg)
 """
@@ -90,11 +121,11 @@ class Sb3VecEnvWrapper(gym.Wrapper, VecEnv):
    Properties
    """
-    def get_episode_rewards(self) -> List[float]:
+    def get_episode_rewards(self) -> list[float]:
        """Returns the rewards of all the episodes."""
        return self._ep_rew_buf.cpu().tolist()
-    def get_episode_lengths(self) -> List[int]:
+    def get_episode_lengths(self) -> list[int]:
        """Returns the number of time-steps of all the episodes."""
        return self._ep_len_buf.cpu().tolist()
@@ -180,10 +211,10 @@ class Sb3VecEnvWrapper(gym.Wrapper, VecEnv):
            raise NotImplementedError(f"Unsupported backend for simulation: {self.env.sim.backend}")
        return obs
-    def _process_extras(self, obs, dones, extras, reset_ids) -> List[Dict[str, Any]]:
+    def _process_extras(self, obs, dones, extras, reset_ids) -> list[dict[str, Any]]:
        """Convert miscellaneous information into dictionary for each sub-environment."""
        # create empty list of dictionaries to fill
-        infos: List[Dict[str, Any]] = [dict.fromkeys(extras.keys()) for _ in range(self.env.num_envs)]
+        infos: list[dict[str, Any]] = [dict.fromkeys(extras.keys()) for _ in range(self.env.num_envs)]
        # fill-in information for each sub-environment
        # Note: This loop becomes slow when number of environments is large.
        for idx in range(self.env.num_envs):

--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/skrl.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/utils/wrappers/skrl.py
@@ -29,16 +29,63 @@ import copy
 import torch
 import tqdm
-# skrl
 from skrl.agents.torch import Agent
 from skrl.envs.torch.wrappers import Wrapper, wrap_env
+from skrl.resources.preprocessors.torch import RunningStandardScaler  # noqa: F401
+from skrl.resources.schedulers.torch import KLAdaptiveRL  # noqa: F401
 from skrl.trainers.torch import Trainer
 from skrl.trainers.torch.sequential import SEQUENTIAL_TRAINER_DEFAULT_CONFIG
+from skrl.utils.model_instantiators import Shape  # noqa: F401
 from omni.isaac.orbit.envs import RLEnv
 __all__ = ["SkrlVecEnvWrapper", "SkrlSequentialLogTrainer"]
+"""
+Configuration Parser.
+"""
+def process_skrl_cfg(cfg: dict) -> dict:
+    """Convert simple YAML types to skrl classes/components.
+    Args:
+        cfg: A configuration dictionary.
+    Returns:
+        A dictionary containing the converted configuration.
+    """
+    _direct_eval = [
+        "learning_rate_scheduler",
+        "state_preprocessor",
+        "value_preprocessor",
+        "input_shape",
+        "output_shape",
+    ]
+    def reward_shaper_function(scale):
+        def reward_shaper(rewards, timestep, timesteps):
+            return rewards * scale
+        return reward_shaper
+    def update_dict(d):
+        for key, value in d.items():
+            if isinstance(value, dict):
+                update_dict(value)
+            else:
+                if key in _direct_eval:
+                    d[key] = eval(value)
+                elif key.endswith("_kwargs"):
+                    d[key] = value if value is not None else {}
+                elif key in ["rewards_shaper_scale"]:
+                    d["rewards_shaper"] = reward_shaper_function(value)
+        return d
+    # parse agent configuration and convert to classes
+    return update_dict(cfg)
 """
 Vectorized environment wrapper.

--- a/source/standalone/environments/list_envs.py
+++ b/source/standalone/environments/list_envs.py
@@ -50,7 +50,7 @@ def main():
    for task_spec in gym.envs.registry.all():
        if "Isaac" in task_spec.id:
            # add details to table
-            table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec._kwargs["cfg_entry_point"]])
+            table.add_row([index + 1, task_spec.id, task_spec.entry_point, task_spec._kwargs["env_cfg_entry_point"]])
            # increment count
            index += 1

--- a/source/standalone/workflows/rl_games/config.py
+++ b/source/standalone/workflows/rl_games/config.py
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-"""Utility functions for parsing rl-games configuration files."""
-from __future__ import annotations
-import os
-import yaml
-from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
-__all__ = ["RLG_PPO_CONFIG_FILE", "parse_rlg_cfg"]
-RLG_PPO_CONFIG_FILE = {
-    # classic
-    "Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/cartpole_ppo.yaml"),
-    "Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/ant_ppo.yaml"),
-    "Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/humanoid_ppo.yaml"),
-    # manipulation
-    "Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/lift_ppo.yaml"),
-    "Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rl_games/reach_ppo.yaml"),
-}
-"""Mapping from environment names to PPO agent files."""
-def parse_rlg_cfg(task_name) -> dict:
-    """Parse configuration based on command line arguments.
-    Args:
-        task_name: The name of the environment.
-    Returns:
-        A dictionary containing the parsed configuration.
-    """
-    # retrieve the default environment config file
-    try:
-        config_file = RLG_PPO_CONFIG_FILE[task_name]
-    except KeyError:
-        raise ValueError(f"Task not found: {task_name}")
-    # parse agent configuration
-    with open(config_file, encoding="utf-8") as f:
-        cfg = yaml.load(f, Loader=yaml.Loader)
-    return cfg
--- a/source/standalone/workflows/rl_games/play.py
+++ b/source/standalone/workflows/rl_games/play.py
@@ -50,17 +50,15 @@ from rl_games.torch_runner import Runner
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
-from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
+from omni.isaac.orbit_envs.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
 from omni.isaac.orbit_envs.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper
-from config import parse_rlg_cfg
 def main():
    """Play with RL-Games agent."""
    # parse env configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    agent_cfg = parse_rlg_cfg(args_cli.task)
+    agent_cfg = load_cfg_from_registry(args_cli.task, "rl_games_cfg_entry_point")
    # wrap around environment for rl-games
    rl_device = agent_cfg["params"]["config"]["device"]

--- a/source/standalone/workflows/rl_games/train.py
+++ b/source/standalone/workflows/rl_games/train.py
@@ -57,11 +57,9 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
-from omni.isaac.orbit_envs.utils import parse_env_cfg
+from omni.isaac.orbit_envs.utils import load_cfg_from_registry, parse_env_cfg
 from omni.isaac.orbit_envs.utils.wrappers.rl_games import RlGamesGpuEnv, RlGamesVecEnvWrapper
-from config import parse_rlg_cfg
 def main():
    """Train with RL-Games agent."""
@@ -70,7 +68,7 @@ def main():
    # parse configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    agent_cfg = parse_rlg_cfg(args_cli.task)
+    agent_cfg = load_cfg_from_registry(args_cli.task, "rl_games_cfg_entry_point")
    # override from command line
    if args_cli_seed is not None:
        agent_cfg["params"]["seed"] = args_cli_seed
@@ -80,7 +78,7 @@ def main():
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Logging experiment in directory: {log_root_path}")
    # specify directory for logging runs
-    log_dir = agent_cfg["params"]["config"].get("full_experiment_name", datetime.now().strftime("%b%d_%H-%M-%S"))
+    log_dir = agent_cfg["params"]["config"].get("full_experiment_name", datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    # set directory into agent config
    # logging directory path: <train_dir>/<full_experiment_name>
    agent_cfg["params"]["config"]["train_dir"] = log_root_path

--- a/source/standalone/workflows/robomimic/config.py
+++ b/source/standalone/workflows/robomimic/config.py
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-"""Utility functions for parsing robomimic configuration files."""
-from __future__ import annotations
-import os
-from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
-ROBOMIMIC_CONFIG_FILES_DICT = {
-    "Isaac-Lift-Franka-v0": {
-        "bc": os.path.join(ORBIT_ENVS_DATA_DIR, "robomimic/lift_bc.json"),
-        "bcq": os.path.join(ORBIT_ENVS_DATA_DIR, "robomimic/lift_bcq.json"),
-    }
-}
-"""Mapping from environment names to imitation learning config files."""
-__all__ = ["ROBOMIMIC_CONFIG_FILES_DICT"]
--- a/source/standalone/workflows/robomimic/train.py
+++ b/source/standalone/workflows/robomimic/train.py
@@ -34,11 +34,27 @@ Args:
    task: name of the environment.
    name: if provided, override the experiment name defined in the config
    dataset: if provided, override the dataset path defined in the config
+This file has been modified from the original version in the following ways:
+* Added import of AppLauncher from omni.isaac.orbit.app to resolve the configuration to load for training.
 """
 from __future__ import annotations
+"""Launch Isaac Sim Simulator first."""
+from omni.isaac.orbit.app import AppLauncher
+# launch omniverse app
+app_launcher = AppLauncher(headless=True)
+simulation_app = app_launcher.app
+"""Rest everything follows."""
 import argparse
+import gym
 import json
 import numpy as np
 import os
@@ -59,8 +75,6 @@ from robomimic.algo import RolloutPolicy, algo_factory
 from robomimic.config import config_factory
 from robomimic.utils.log_utils import DataLogger, PrintLogger
-from config import ROBOMIMIC_CONFIG_FILES_DICT
 def train(config, device):
    """Train a model using the algorithm."""
@@ -331,8 +345,17 @@ def main(args):
    """Train a model on a task using a specified algorithm."""
    # load config
    if args.task is not None:
+        # obtain the configuration entry point
+        cfg_entry_point_key = f"robomimic_{args.algo}_cfg_entry_point"
+        cfg_entry_point_file = gym.spec(args.task)._kwargs.pop(cfg_entry_point_key)
+        # check if entry point exists
+        if cfg_entry_point_file is None:
+            raise ValueError(
+                f"Could not find configuration for the environment: '{args.task}'."
+                f" Please check that the gym registry has the entry point: '{cfg_entry_point_key}'."
+            )
        # load config from json file
-        with open(ROBOMIMIC_CONFIG_FILES_DICT[args.task][args.algo]) as f:
+        with open(cfg_entry_point_file) as f:
            ext_cfg = json.load(f)
            config = config_factory(ext_cfg["algo_name"])
        # update config with external json - this will throw errors if
@@ -387,4 +410,12 @@ if __name__ == "__main__":
    parser.add_argument("--algo", type=str, default=None, help="Name of the algorithm.")
    args = parser.parse_args()
-    main(args)
+    try:
+        # run training
+        main(args)
+    except Exception:
+        raise
+    finally:
+        # close sim app
+        simulation_app.close()
--- a/source/standalone/workflows/rsl_rl/cli_args.py
+++ b/source/standalone/workflows/rsl_rl/cli_args.py
+from __future__ import annotations
+import argparse
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg
+def add_rsl_rl_args(parser: argparse.ArgumentParser):
+    """Add RSL-RL arguments to the parser.
+    Args:
+        parser: The parser to add the arguments to.
+    """
+    # create a new argument group
+    arg_group = parser.add_argument_group("rsl_rl", description="Arguments for RSL-RL agent.")
+    arg_group.add_argument(
+        "--experiment_name", type=str, default=None, help="Name of the experiment folder where logs will be stored."
+    )
+    arg_group.add_argument("--resume", type=bool, default=None, help="Whether to resume from a checkpoint.")
+    arg_group.add_argument("--load_run", type=str, default=None, help="Name of the run folder to resume from.")
+    arg_group.add_argument("--checkpoint", type=str, default=None, help="Checkpoint file to resume from.")
+def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPolicyRunnerCfg:
+    """Parse configuration for RSL-RL agent based on inputs.
+    Args:
+        task_name: The name of the environment.
+        args_cli: The command line arguments.
+    Returns:
+        The parsed configuration for RSL-RL agent based on inputs.
+    """
+    from omni.isaac.orbit_envs.utils.parse_cfg import load_cfg_from_registry
+    # load the default configuration
+    rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point")
+    # override the default configuration with CLI arguments
+    if args_cli.seed is not None:
+        rslrl_cfg.seed = args_cli.seed
+    if args_cli.resume is not None:
+        rslrl_cfg.resume = args_cli.resume
+    if args_cli.load_run is not None:
+        rslrl_cfg.load_run = args_cli.load_run
+    if args_cli.checkpoint is not None:
+        rslrl_cfg.load_checkpoint = args_cli.checkpoint
+    return rslrl_cfg
--- a/source/standalone/workflows/rsl_rl/config.py
+++ b/source/standalone/workflows/rsl_rl/config.py
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-"""Utility functions for parsing rsl-rl configuration files."""
-from __future__ import annotations
-import os
-import yaml
-from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
-__all__ = ["RSLRL_PPO_CONFIG_FILE", "parse_rslrl_cfg"]
-RSLRL_PPO_CONFIG_FILE = {
-    # classic
-    "Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/cartpole_ppo.yaml"),
-    # manipulation
-    "Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/lift_ppo.yaml"),
-    "Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/reach_ppo.yaml"),
-    # locomotion
-    "Isaac-Velocity-Rough-Anymal-C-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/anymal_ppo.yaml"),
-    "Isaac-Velocity-Rough-Anymal-C-Play-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "rsl_rl/anymal_ppo.yaml"),
-}
-"""Mapping from environment names to PPO agent files."""
-def parse_rslrl_cfg(task_name) -> dict:
-    """Parse configuration for RSL-RL agent based on inputs.
-    Args:
-        task_name: The name of the environment.
-    Returns:
-        A dictionary containing the parsed configuration.
-    """
-    # retrieve the default environment config file
-    try:
-        config_file = RSLRL_PPO_CONFIG_FILE[task_name]
-    except KeyError:
-        raise ValueError(f"Task not found: {task_name}")
-    # parse agent configuration
-    with open(config_file, encoding="utf-8") as f:
-        cfg = yaml.load(f, Loader=yaml.FullLoader)
-    return cfg
--- a/source/standalone/workflows/rsl_rl/play.py
+++ b/source/standalone/workflows/rsl_rl/play.py
@@ -14,15 +14,19 @@ import argparse
 from omni.isaac.orbit.app import AppLauncher
+# local imports
+import source.standalone.workflows.rsl_rl.cli_args as cli_args  # isort: skip
 # add argparse arguments
-parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RSL-RL.")
+parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
 parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU pipeline.")
 parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
 parser.add_argument("--task", type=str, default=None, help="Name of the task.")
-parser.add_argument("--checkpoint", type=str, default=None, help="Path to model checkpoint.")
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+# append RSL-RL cli arguments
+cli_args.add_rsl_rl_args(parser)
 # append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
-# parse the arguments
 args_cli = parser.parse_args()
 # launch omniverse app
@@ -43,16 +47,18 @@ from rsl_rl.runners import OnPolicyRunner
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
 from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
-from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper, export_policy_as_onnx
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import (
+    RslRlOnPolicyRunnerCfg,
-from config import parse_rslrl_cfg
+    RslRlVecEnvWrapper,
+    export_policy_as_onnx,
+)
 def main():
    """Play with RSL-RL agent."""
    # parse configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    agent_cfg = parse_rslrl_cfg(args_cli.task)
+    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
    # create isaac environment
    env = gym.make(args_cli.task, cfg=env_cfg)
@@ -60,29 +66,17 @@ def main():
    env = RslRlVecEnvWrapper(env)
    # specify directory for logging experiments
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg["runner"]["experiment_name"])
+    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Loading experiment from directory: {log_root_path}")
-    # get path to previous checkpoint
+    resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
-    if args_cli.checkpoint is None:
-        # specify directory for logging runs
-        run_dir = "*"
-        if agent_cfg["runner"]["load_run"] != -1:
-            run_dir = agent_cfg["runner"]["load_run"]
-        # specify name of checkpoint
-        checkpoint_file = None
-        if agent_cfg["runner"]["checkpoint"] != -1:
-            checkpoint_file = f"model_{agent_cfg['runner']['checkpoint']}.pt"
-        # get path to previous checkpoint
-        resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file)
-    else:
-        resume_path = os.path.abspath(args_cli.checkpoint)
-    # load previously trained model
    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
-    # create runner from rsl-rl
+    # load previously trained model
-    ppo_runner = OnPolicyRunner(env, agent_cfg, log_dir=None, device=agent_cfg["device"])
+    ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
    ppo_runner.load(resume_path)
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
    # obtain the trained policy for inference
    policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)

--- a/source/standalone/workflows/rsl_rl/train.py
+++ b/source/standalone/workflows/rsl_rl/train.py
@@ -15,6 +15,10 @@ import os
 from omni.isaac.orbit.app import AppLauncher
+# local imports
+import source.standalone.workflows.rsl_rl.cli_args as cli_args  # isort: skip
 # add argparse arguments
 parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
@@ -24,9 +28,10 @@ parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU p
 parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
 parser.add_argument("--task", type=str, default=None, help="Name of the task.")
 parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+# append RSL-RL cli arguments
+cli_args.add_rsl_rl_args(parser)
 # append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
-# parse the arguments
 args_cli = parser.parse_args()
 # load cheaper kit config in headless
@@ -51,15 +56,14 @@ from datetime import datetime
 import carb
 from rsl_rl.runners import OnPolicyRunner
+from omni.isaac.orbit.envs.rl_env_cfg import RLEnvCfg
 from omni.isaac.orbit.utils.dict import print_dict
 from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
 from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
-from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlVecEnvWrapper
+from omni.isaac.orbit_envs.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
-from config import parse_rslrl_cfg
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
@@ -70,28 +74,19 @@ torch.backends.cudnn.benchmark = False
 def main():
    """Train with RSL-RL agent."""
    # parse configuration
-    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
+    env_cfg: RLEnvCfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    agent_cfg = parse_rslrl_cfg(args_cli.task)
+    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
-    # override config from command line
-    if args_cli.seed is not None:
-        agent_cfg["seed"] = args_cli.seed
    # specify directory for logging experiments
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg["runner"]["experiment_name"])
+    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Logging experiment in directory: {log_root_path}")
    # specify directory for logging runs
-    log_dir = datetime.now().strftime("%b%d_%H-%M-%S")
+    log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    if agent_cfg["runner"]["run_name"]:
+    if agent_cfg.run_name:
-        log_dir += f"_{agent_cfg['runner']['run_name']}"
+        log_dir += f"_{agent_cfg.run_name}"
    log_dir = os.path.join(log_root_path, log_dir)
-    # dump the configuration into log-directory
-    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
-    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
-    dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
-    dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
    # create isaac environment
    env = gym.make(args_cli.task, cfg=env_cfg)
    # wrap for video recording
@@ -108,27 +103,26 @@ def main():
    env = RslRlVecEnvWrapper(env)
    # create runner from rsl-rl
-    runner = OnPolicyRunner(env, agent_cfg, log_dir=log_dir, device=agent_cfg["device"])
+    runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
    # save resume path before creating a new log_dir
-    if agent_cfg["runner"]["resume"]:
+    if agent_cfg.resume:
-        # specify directory for logging runs
-        run_dir = "*"
-        if agent_cfg["runner"]["load_run"] != -1:
-            run_dir = agent_cfg["runner"]["load_run"]
-        # specify name of checkpoint
-        checkpoint_file = None
-        if agent_cfg["runner"]["checkpoint"] != -1:
-            checkpoint_file = f"model_{agent_cfg['runner']['checkpoint']}.pt"
        # get path to previous checkpoint
-        resume_path = get_checkpoint_path(log_root_path, run_dir, checkpoint_file)
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
        # load previously trained model
        runner.load(resume_path)
    # set seed of the environment
-    env.seed(agent_cfg["seed"])
+    env.seed(agent_cfg.seed)
+    # dump the configuration into log-directory
+    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
+    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
+    dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
+    dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
    # run training
-    runner.learn(num_learning_iterations=agent_cfg["runner"]["max_iterations"], init_at_random_ep_len=True)
+    runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
    # close the simulator
    env.close()

--- a/source/standalone/workflows/sb3/config.py
+++ b/source/standalone/workflows/sb3/config.py
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-"""Utility functions for parsing sb3 configuration files."""
-from __future__ import annotations
-import os
-import yaml
-from torch import nn as nn  # noqa: F401
-from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
-__all__ = ["SB3_PPO_CONFIG_FILE", "parse_sb3_cfg"]
-SB3_PPO_CONFIG_FILE = {
-    # classic
-    "Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/cartpole_ppo.yaml"),
-    "Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/ant_ppo.yaml"),
-    "Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/humanoid_ppo.yaml"),
-    # manipulation
-    "Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "sb3/reach_ppo.yaml"),
-}
-"""Mapping from environment names to PPO agent files."""
-def parse_sb3_cfg(task_name) -> dict:
-    """Parse configuration for Stable-baselines3 agent based on inputs.
-    Args:
-        task_name: The name of the environment.
-    Returns:
-        A dictionary containing the parsed configuration.
-    """
-    # retrieve the default environment config file
-    try:
-        config_file = SB3_PPO_CONFIG_FILE[task_name]
-    except KeyError:
-        raise ValueError(f"Task not found: {task_name}. Configurations exist for {SB3_PPO_CONFIG_FILE.keys()}")
-    # parse agent configuration
-    with open(config_file, encoding="utf-8") as f:
-        cfg = yaml.load(f, Loader=yaml.FullLoader)
-    # check config is valid
-    if cfg is None:
-        raise ValueError(f"Config file is empty: {config_file}")
-    # post-process certain arguments
-    # reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/0e5eb145faefa33e7d79c7f8c179788574b20da5/utils/exp_manager.py#L358
-    for kwargs_key in ["policy_kwargs", "replay_buffer_class", "replay_buffer_kwargs"]:
-        if kwargs_key in cfg and isinstance(cfg[kwargs_key], str):
-            cfg[kwargs_key] = eval(cfg[kwargs_key])
-    return cfg
--- a/source/standalone/workflows/sb3/play.py
+++ b/source/standalone/workflows/sb3/play.py
@@ -43,22 +43,22 @@ from stable_baselines3.common.vec_env import VecNormalize
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
-from omni.isaac.orbit_envs.utils.parse_cfg import parse_env_cfg
+from omni.isaac.orbit_envs.utils.parse_cfg import load_cfg_from_registry, parse_env_cfg
-from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper
+from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper, process_sb3_cfg
-from config import parse_sb3_cfg
 def main():
    """Play with stable-baselines agent."""
    # parse configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
+    agent_cfg = load_cfg_from_registry(args_cli.task, "sb3_cfg_entry_point")
+    # post-process agent configuration
+    agent_cfg = process_sb3_cfg(agent_cfg)
    # create isaac environment
    env = gym.make(args_cli.task, cfg=env_cfg)
    # wrap around environment for stable baselines
    env = Sb3VecEnvWrapper(env)
-    # parse agent configuration
-    agent_cfg = parse_sb3_cfg(args_cli.task)
    # normalize environment (if needed)
    if "normalize_input" in agent_cfg:

--- a/source/standalone/workflows/sb3/train.py
+++ b/source/standalone/workflows/sb3/train.py
@@ -59,23 +59,24 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
-from omni.isaac.orbit_envs.utils import parse_env_cfg
+from omni.isaac.orbit_envs.utils import load_cfg_from_registry, parse_env_cfg
-from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper
+from omni.isaac.orbit_envs.utils.wrappers.sb3 import Sb3VecEnvWrapper, process_sb3_cfg
-from config import parse_sb3_cfg
 def main():
    """Train with stable-baselines agent."""
    # parse configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    agent_cfg = parse_sb3_cfg(args_cli.task)
+    agent_cfg = load_cfg_from_registry(args_cli.task, "sb3_cfg_entry_point")
+    # post-process agent configuration
+    agent_cfg = process_sb3_cfg(agent_cfg)
    # override configuration with command line arguments
    if args_cli.seed is not None:
        agent_cfg["seed"] = args_cli.seed
    # directory for logging into
-    log_dir = os.path.join("logs", "sb3", args_cli.task, datetime.now().strftime("%b%d_%H-%M-%S"))
+    log_dir = os.path.join("logs", "sb3", args_cli.task, datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    # dump the configuration into log-directory
    dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
    dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)

--- a/source/standalone/workflows/skrl/config.py
+++ b/source/standalone/workflows/skrl/config.py
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-"""Utility functions for parsing skrl configuration files."""
-from __future__ import annotations
-import os
-import yaml
-from skrl.resources.preprocessors.torch import RunningStandardScaler  # noqa: F401
-from skrl.resources.schedulers.torch import KLAdaptiveRL  # noqa: F401
-from skrl.utils.model_instantiators import Shape  # noqa: F401
-from omni.isaac.orbit_envs import ORBIT_ENVS_DATA_DIR
-__all__ = ["SKRL_PPO_CONFIG_FILE", "parse_skrl_cfg"]
-SKRL_PPO_CONFIG_FILE = {
-    # classic
-    "Isaac-Cartpole-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/cartpole_ppo.yaml"),
-    "Isaac-Ant-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/ant_ppo.yaml"),
-    "Isaac-Humanoid-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/humanoid_ppo.yaml"),
-    # manipulation
-    "Isaac-Lift-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/lift_ppo.yaml"),
-    "Isaac-Reach-Franka-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/reach_ppo.yaml"),
-    # locomotion
-    "Isaac-Velocity-Anymal-C-v0": os.path.join(ORBIT_ENVS_DATA_DIR, "skrl/anymal_ppo.yaml"),
-}
-"""Mapping from environment names to PPO agent files."""
-def parse_skrl_cfg(task_name) -> dict:
-    """Parse configuration based on command line arguments.
-    Args:
-        task_name: The name of the environment.
-    Returns:
-        A dictionary containing the parsed configuration.
-    """
-    # retrieve the default environment config file
-    try:
-        config_file = SKRL_PPO_CONFIG_FILE[task_name]
-    except KeyError:
-        raise ValueError(f"Task not found: {task_name}")
-    # parse agent configuration
-    with open(config_file, encoding="utf-8") as f:
-        cfg = yaml.load(f, Loader=yaml.Loader)
-    return cfg
-def convert_skrl_cfg(cfg):
-    """Convert simple YAML types to skrl classes/components.
-    Args:
-        cfg: configuration dictionary.
-    Returns:
-        A dictionary containing the converted configuration.
-    """
-    _direct_eval = [
-        "learning_rate_scheduler",
-        "state_preprocessor",
-        "value_preprocessor",
-        "input_shape",
-        "output_shape",
-    ]
-    def reward_shaper_function(scale):
-        def reward_shaper(rewards, timestep, timesteps):
-            return rewards * scale
-        return reward_shaper
-    def update_dict(d):
-        for key, value in d.items():
-            if isinstance(value, dict):
-                update_dict(value)
-            else:
-                if key in _direct_eval:
-                    d[key] = eval(value)
-                elif key.endswith("_kwargs"):
-                    d[key] = value if value is not None else {}
-                elif key in ["rewards_shaper_scale"]:
-                    d["rewards_shaper"] = reward_shaper_function(value)
-        return d
-    # parse agent configuration and convert to classes
-    return update_dict(cfg)
--- a/source/standalone/workflows/skrl/play.py
+++ b/source/standalone/workflows/skrl/play.py
@@ -48,17 +48,15 @@ from skrl.utils.model_instantiators import deterministic_model, gaussian_model,
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
-from omni.isaac.orbit_envs.utils import get_checkpoint_path, parse_env_cfg
+from omni.isaac.orbit_envs.utils import get_checkpoint_path, load_cfg_from_registry, parse_env_cfg
-from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlVecEnvWrapper
+from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlVecEnvWrapper, process_skrl_cfg
-from config import convert_skrl_cfg, parse_skrl_cfg
 def main():
    """Play with skrl agent."""
    # parse env configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    experiment_cfg = parse_skrl_cfg(args_cli.task)
+    experiment_cfg = load_cfg_from_registry(args_cli.task, "skrl_cfg_entry_point")
    # create isaac environment
    env = gym.make(args_cli.task, cfg=env_cfg)
@@ -74,13 +72,13 @@ def main():
            observation_space=env.observation_space,
            action_space=env.action_space,
            device=env.device,
-            **convert_skrl_cfg(experiment_cfg["models"]["policy"]),
+            **process_skrl_cfg(experiment_cfg["models"]["policy"]),
        )
        models["value"] = deterministic_model(
            observation_space=env.observation_space,
            action_space=env.action_space,
            device=env.device,
-            **convert_skrl_cfg(experiment_cfg["models"]["value"]),
+            **process_skrl_cfg(experiment_cfg["models"]["value"]),
        )
    # shared models
    else:
@@ -91,8 +89,8 @@ def main():
            structure=None,
            roles=["policy", "value"],
            parameters=[
-                convert_skrl_cfg(experiment_cfg["models"]["policy"]),
+                process_skrl_cfg(experiment_cfg["models"]["policy"]),
-                convert_skrl_cfg(experiment_cfg["models"]["value"]),
+                process_skrl_cfg(experiment_cfg["models"]["value"]),
            ],
        )
        models["value"] = models["policy"]
@@ -101,7 +99,7 @@ def main():
    # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
    agent_cfg = PPO_DEFAULT_CONFIG.copy()
    experiment_cfg["agent"]["rewards_shaper"] = None  # avoid 'dictionary changed size during iteration'
-    agent_cfg.update(convert_skrl_cfg(experiment_cfg["agent"]))
+    agent_cfg.update(process_skrl_cfg(experiment_cfg["agent"]))
    agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device})
    agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device})

--- a/source/standalone/workflows/skrl/train.py
+++ b/source/standalone/workflows/skrl/train.py
@@ -63,10 +63,8 @@ from omni.isaac.orbit.utils.io import dump_pickle, dump_yaml
 import omni.isaac.contrib_envs  # noqa: F401
 import omni.isaac.orbit_envs  # noqa: F401
-from omni.isaac.orbit_envs.utils import parse_env_cfg
+from omni.isaac.orbit_envs.utils import load_cfg_from_registry, parse_env_cfg
-from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlSequentialLogTrainer, SkrlVecEnvWrapper
+from omni.isaac.orbit_envs.utils.wrappers.skrl import SkrlSequentialLogTrainer, SkrlVecEnvWrapper, process_skrl_cfg
-from config import convert_skrl_cfg, parse_skrl_cfg
 def main():
@@ -76,14 +74,14 @@ def main():
    # parse configuration
    env_cfg = parse_env_cfg(args_cli.task, use_gpu=not args_cli.cpu, num_envs=args_cli.num_envs)
-    experiment_cfg = parse_skrl_cfg(args_cli.task)
+    experiment_cfg = load_cfg_from_registry(args_cli.task, "skrl_cfg_entry_point")
    # specify directory for logging experiments
    log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Logging experiment in directory: {log_root_path}")
    # specify directory for logging runs
-    log_dir = datetime.now().strftime("%b%d_%H-%M-%S")
+    log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    if experiment_cfg["agent"]["experiment"]["experiment_name"]:
        log_dir += f'_{experiment_cfg["agent"]["experiment"]["experiment_name"]}'
    # set directory into agent config
@@ -125,13 +123,13 @@ def main():
            observation_space=env.observation_space,
            action_space=env.action_space,
            device=env.device,
-            **convert_skrl_cfg(experiment_cfg["models"]["policy"]),
+            **process_skrl_cfg(experiment_cfg["models"]["policy"]),
        )
        models["value"] = deterministic_model(
            observation_space=env.observation_space,
            action_space=env.action_space,
            device=env.device,
-            **convert_skrl_cfg(experiment_cfg["models"]["value"]),
+            **process_skrl_cfg(experiment_cfg["models"]["value"]),
        )
    # shared models
    else:
@@ -142,8 +140,8 @@ def main():
            structure=None,
            roles=["policy", "value"],
            parameters=[
-                convert_skrl_cfg(experiment_cfg["models"]["policy"]),
+                process_skrl_cfg(experiment_cfg["models"]["policy"]),
-                convert_skrl_cfg(experiment_cfg["models"]["value"]),
+                process_skrl_cfg(experiment_cfg["models"]["value"]),
            ],
        )
        models["value"] = models["policy"]
@@ -157,7 +155,7 @@ def main():
    # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html
    agent_cfg = PPO_DEFAULT_CONFIG.copy()
    experiment_cfg["agent"]["rewards_shaper"] = None  # avoid 'dictionary changed size during iteration'
-    agent_cfg.update(convert_skrl_cfg(experiment_cfg["agent"]))
+    agent_cfg.update(process_skrl_cfg(experiment_cfg["agent"]))
    agent_cfg["state_preprocessor_kwargs"].update({"size": env.observation_space, "device": env.device})
    agent_cfg["value_preprocessor_kwargs"].update({"size": 1, "device": env.device})