Updates cartpole environment to new framework (#241)

# Description This MR provides the Cartpole environment reimplemented with the new RLTaskEnv. Replaces #177 ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Screenshot Training results with SB-3 (orange), RSL-RL (purple) and RL-Games (black) ![sb3](https://github.com/isaac-orbit/orbit/assets/12863862/c4b7a50e-099a-4391-8142-e2924e633429) ![rslr-l](https://github.com/isaac-orbit/orbit/assets/12863862/aa223a46-d4a8-454a-b26d-b6359e173bbc) ![rlg](https://github.com/isaac-orbit/orbit/assets/12863862/2f20d837-40af-4fa2-ac58-7f520c4a3517) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there

Updates cartpole environment to new framework (#241)
# Description This MR provides the Cartpole environment reimplemented with the new RLTaskEnv. Replaces #177 ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Screenshot Training results with SB-3 (orange), RSL-RL (purple) and RL-Games (black) ![sb3](https://github.com/isaac-orbit/orbit/assets/12863862/c4b7a50e-099a-4391-8142-e2924e633429) ![rslr-l](https://github.com/isaac-orbit/orbit/assets/12863862/aa223a46-d4a8-454a-b26d-b6359e173bbc) ![rlg](https://github.com/isaac-orbit/orbit/assets/12863862/2f20d837-40af-4fa2-ac58-7f520c4a3517) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
541dd06d · Mayank Mittal · GitHub · 5d44141e · 541dd06d · 541dd06d
Unverified Commit 541dd06d authored Nov 10, 2023 by Mayank Mittal Committed by GitHub Nov 10, 2023
18 changed files
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/randomizations.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/randomizations.py
@@ -169,14 +169,14 @@ def push_by_setting_velocity(
    asset.write_root_velocity_to_sim(vel_w, env_ids=env_ids)
-def reset_root_state(
+def reset_root_state_uniform(
    env: BaseEnv,
    env_ids: torch.Tensor,
    pose_range: dict[str, tuple[float, float]],
    velocity_range: dict[str, tuple[float, float]],
    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
 ):
-    """Reset the asset root state to a random position and velocity within the given ranges.
+    """Reset the asset root state to a random position and velocity uniformly within the given ranges.
    This function randomizes the root position and velocity of the asset.
@@ -245,6 +245,32 @@ def reset_joints_by_scale(
    asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
+def reset_joints_by_offset(
+    env: BaseEnv,
+    env_ids: torch.Tensor,
+    position_range: tuple[float, float],
+    velocity_range: tuple[float, float],
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+):
+    """Reset the robot joints with offsets around the default position and velocity by the given ranges.
+    This function samples random values from the given ranges and biases the default joint positions and velocities
+    by these values. The biased values are then set into the physics simulation.
+    """
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # get default joint state
+    joint_pos = asset.data.default_joint_pos[env_ids].clone()
+    joint_vel = asset.data.default_joint_vel[env_ids].clone()
+    # bias these values randomly
+    joint_pos += sample_uniform(*position_range, joint_pos.shape, joint_pos.device)
+    joint_vel += sample_uniform(*velocity_range, joint_vel.shape, joint_vel.device)
+    # set into the physics simulation
+    asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
 def reset_scene_to_default(env: BaseEnv, env_ids: torch.Tensor):
    """Reset the scene to the default state specified in the scene configuration."""
    # root states

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/rewards.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/rewards.py
@@ -26,6 +26,11 @@ General.
 """
+def alive_bonus(env: RLTaskEnv) -> torch.Tensor:
+    """Reward for being alive."""
+    return ~env.reset_buf * 1.0
 def termination_penalty(env: RLTaskEnv) -> torch.Tensor:
    """Penalize terminated episodes that don't correspond to episodic timeouts."""
    return env.reset_buf * (~env.termination_manager.time_outs)
@@ -86,14 +91,21 @@ Joint penalties.
 def joint_torques_l2(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
-    """Penalize torques applied on the articulation using L2-kernel."""
+    """Penalize joint torques applied on the articulation using L2-kernel."""
    # extract the used quantities (to enable type-hinting)
    asset: Articulation = env.scene[asset_cfg.name]
    return torch.sum(torch.square(asset.data.applied_torque), dim=1)
+def joint_vel_l1(env: RLTaskEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint velocities on the articulation using an L1-kernel."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    return torch.sum(torch.abs(asset.data.joint_vel[:, asset_cfg.joint_ids]), dim=1)
 def joint_vel_l2(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
-    """Penalize joint velocities on the articulation."""
+    """Penalize joint velocities on the articulation using L2-kernel."""
    # extract the used quantities (to enable type-hinting)
    asset: Articulation = env.scene[asset_cfg.name]
    return torch.sum(torch.square(asset.data.joint_vel), dim=1)

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/terminations.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/envs/mdp/terminations.py
@@ -82,14 +82,30 @@ def joint_pos_limit(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("
    # extract the used quantities (to enable type-hinting)
    asset: Articulation = env.scene[asset_cfg.name]
    # compute any violations
-    out_of_upper_limits = torch.any(asset.data.joint_pos > asset.data.soft_joint_pos_limits[:, 0], dim=1)
+    out_of_upper_limits = torch.any(asset.data.joint_pos > asset.data.soft_joint_pos_limits[..., 1], dim=1)
-    out_of_lower_limits = torch.any(asset.data.joint_pos < asset.data.soft_joint_pos_limits[:, 1], dim=1)
+    out_of_lower_limits = torch.any(asset.data.joint_pos < asset.data.soft_joint_pos_limits[..., 0], dim=1)
    return torch.logical_or(out_of_upper_limits, out_of_lower_limits)
-def joint_velocity_limit(
+def joint_pos_manual_limit(
-    env: RLTaskEnv, max_velocity, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
+    env: RLTaskEnv, bounds: tuple[float, float], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
 ) -> torch.Tensor:
+    """Terminate when the asset's joint positions are outside of the configured bounds.
+    Note:
+        This function is similar to :func:`joint_pos_limit` but allows the user to specify the bounds manually.
+    """
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    if asset_cfg.joint_ids is None:
+        asset_cfg.joint_ids = slice(None)
+    # compute any violations
+    out_of_upper_limits = torch.any(asset.data.joint_pos[:, asset_cfg.joint_ids] > bounds[1], dim=1)
+    out_of_lower_limits = torch.any(asset.data.joint_pos[:, asset_cfg.joint_ids] < bounds[0], dim=1)
+    return torch.logical_or(out_of_upper_limits, out_of_lower_limits)
+def joint_vel_limit(env: RLTaskEnv, max_velocity, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
    """Terminate when the asset's joint velocities are outside of the soft joint limits."""
    # extract the used quantities (to enable type-hinting)
    asset: Articulation = env.scene[asset_cfg.name]

--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/__init__.py
@@ -49,6 +49,6 @@ __version__ = ORBIT_TASKS_METADATA["package"]["version"]
 from .utils import import_packages
 # The blacklist is used to prevent importing configs from sub-packages
-_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "classic", "manipulation", "utils"]
+_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "manipulation", "utils", "classic.ant", "classic.humanoid"]
 # Import all configs in this package
 import_packages(__name__, _BLACKLIST_PKGS)
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/__init__.py
@@ -10,9 +10,3 @@ These environments are based on the MuJoCo environments provided by OpenAI.
 Reference:
    https://github.com/openai/gym/tree/master/gym/envs/mujoco
 """
-from .ant import AntEnv
-from .cartpole import CartpoleEnv
-from .humanoid import HumanoidEnv
-__all__ = ["CartpoleEnv", "AntEnv", "HumanoidEnv"]
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/ant/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/ant/__init__.py
@@ -7,21 +7,21 @@
 Ant locomotion environment (similar to OpenAI Gym Ant-v2).
 """
-import gymnasium as gym
+# import gymnasium as gym
-from . import agents
+# from . import agents
 ##
 # Register Gym environments.
 ##
-gym.register(
+# gym.register(
-    id="Isaac-Ant-v0",
+#     id="Isaac-Ant-v0",
-    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+#     entry_point="omni.isaac.orbit.envs:RLTaskEnv",
-    kwargs={
+#     kwargs={
-        "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
+#         "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+#         "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+#         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+#         "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
-    },
+#     },
-)
+# )
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/__init__.py
@@ -10,6 +10,7 @@ Cartpole balancing environment.
 import gymnasium as gym
 from . import agents
+from .cartpole_env_cfg import CartpoleEnvCfg
 ##
 # Register Gym environments.
@@ -18,10 +19,11 @@ from . import agents
 gym.register(
    id="Isaac-Cartpole-v0",
    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+    disable_env_checker=True,
    kwargs={
-        "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
+        "env_cfg_entry_point": CartpoleEnvCfg,
        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CARTPOLE_RSL_RL_PPO_CFG",
+        "rsl_rl_cfg_entry_point": agents.rsl_rl_ppo_cfg.CartpolePPORunnerCfg,
        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
    },

--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/__init__.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from . import rsl_rl_ppo_cfg  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/rl_games_ppo_cfg.yaml
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/rl_games_ppo_cfg.yaml
@@ -50,9 +50,9 @@ params:
    multi_gpu: False
    ppo: True
    mixed_precision: False
-    normalize_input: True
+    normalize_input: False
-    normalize_value: True
+    normalize_value: False
-    num_actors: -1
+    num_actors: -1  # configured from the script (based on num_envs)
    reward_shaper:
      scale_value: 1.0
    normalize_advantage: False
@@ -62,7 +62,7 @@ params:
    lr_schedule: adaptive
    kl_threshold: 0.008
    score_to_win: 20000
-    max_epochs: 450
+    max_epochs: 150
    save_best_after: 50
    save_frequency: 25
    grad_norm: 1.0
@@ -74,5 +74,5 @@ params:
    mini_epochs: 8
    critic_coef: 4
    clip_value: True
-    seq_len: 4
+    seq_length: 4
-    bounds_loss_coef: 0
+    bounds_loss_coef: 0.0001
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/rsl_rl_ppo_cfg.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/rsl_rl_ppo_cfg.py
@@ -3,34 +3,34 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
+from omni.isaac.orbit.utils import configclass
 from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
    RslRlOnPolicyRunnerCfg,
    RslRlPpoActorCriticCfg,
    RslRlPpoAlgorithmCfg,
 )
-CARTPOLE_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
-    num_steps_per_env=16,
+@configclass
-    max_iterations=500,
+class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg):
-    save_interval=50,
+    num_steps_per_env = 16
-    experiment_name="cartpole",
+    max_iterations = 150
-    run_name="",
+    save_interval = 50
-    resume=False,
+    experiment_name = "cartpole"
-    load_run=-1,
+    empirical_normalization = False
-    load_checkpoint=-1,
+    policy = RslRlPpoActorCriticCfg(
-    empirical_normalization=False,
-    policy=RslRlPpoActorCriticCfg(
        init_noise_std=1.0,
-        actor_hidden_dims=[256, 128, 64],
+        actor_hidden_dims=[32, 32],
-        critic_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[32, 32],
        activation="elu",
-    ),
+    )
-    algorithm=RslRlPpoAlgorithmCfg(
+    algorithm = RslRlPpoAlgorithmCfg(
        value_loss_coef=1.0,
        use_clipped_value_loss=True,
        clip_param=0.2,
-        entropy_coef=0.01,
+        entropy_coef=0.005,
-        num_learning_epochs=8,
+        num_learning_epochs=5,
        num_mini_batches=4,
        learning_rate=1.0e-3,
        schedule="adaptive",
@@ -38,5 +38,4 @@ CARTPOLE_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
        lam=0.95,
        desired_kl=0.01,
        max_grad_norm=1.0,
-    ),
+    )
-)
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/sb3_ppo_cfg.yaml
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/agents/sb3_ppo_cfg.yaml
 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
 seed: 42
-# 512×500×16
+n_timesteps: !!float 1e6
-n_timesteps: !!float 2e6
 policy: 'MlpPolicy'
 n_steps: 16
-batch_size: 8192
+batch_size: 4096
 gae_lambda: 0.95
 gamma: 0.99
-n_epochs: 8
+n_epochs: 20
-ent_coef: 0.0
+ent_coef: 0.01
 learning_rate: !!float 3e-4
-clip_range: 0.2
+clip_range: !!float 0.2
 policy_kwargs: "dict(
-                  log_std_init=-2,
-                  ortho_init=False,
                  activation_fn=nn.ELU,
-                  net_arch=[32, 32]
+                  net_arch=[32, 32],
+                  squash_output=False,
                )"
+vf_coef: 1.0
-target_kl: 0.008
 max_grad_norm: 1.0
-# Uses VecNormalize class to normalize obs
-normalize_input: True
-# Uses VecNormalize class to normalize rew
-normalize_value: True
-clip_obs: 5
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/cartpole_cfg.yaml
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/cartpole_cfg.yaml
-# parameters for the MDP
-env:
-  # general
-  num_envs: 512
-  env_spacing: 4.0
-  episode_length: 500 # sim steps
-  # step parameters
-  control_frequency_inv: 2 # 60 Hz
-  # reset parameters
-  reset_dist: 3.0
-  # actions parameters
-  max_effort: 400.0
-# parameters for setting up the scene
-scene:
-  cartpole:
-    # articulation
-    solver_position_iteration_count: 4
-    solver_velocity_iteration_count: 1
-    sleep_threshold: 0.005
-    stabilization_threshold: 0.001
-    enable_self_collisions: False
-    # per-body
-    enable_gyroscopic_forces: True
-    max_depenetration_velocity: 100.0
-    # per-shape
-    contact_offset: 0.02
-    rest_offset: 0.001
-# parameters for physics engine
-sim:
-  dt: 0.0083 # 1/120 s
-  substeps: 1
-  gravity: [0.0, 0.0, -9.81]
-  enable_scene_query_support: False
-  use_gpu_pipeline: True
-  use_flatcache: True  # deprecated from Isaac Sim 2023.1 onwards
-  use_fabric: True  # used from Isaac Sim 2023.1 onwards
-  device: "cuda:0"
-  physx:
-    # Solver settings
-    solver_type: 1
-    use_gpu: True # set to False to run on CPU
-    bounce_threshold_velocity: 0.2
-    friction_offset_threshold: 0.04
-    friction_correlation_distance: 0.025
-    enable_stabilization: True
-    # GPU buffers
-    gpu_max_rigid_contact_count: 524288
-    gpu_max_rigid_patch_count: 81920
-    gpu_found_lost_pairs_capacity: 1024
-    gpu_found_lost_aggregate_pairs_capacity: 262144
-    gpu_total_aggregate_pairs_capacity: 1024
-    gpu_max_soft_body_contacts: 1048576
-    gpu_max_particle_contacts: 1048576
-    gpu_heap_capacity: 67108864
-    gpu_temp_buffer_capacity: 16777216
-    gpu_max_num_partitions: 8
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/cartpole_env.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/cartpole_env.py
-# Copyright (c) 2022-2023, The ORBIT Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-from __future__ import annotations
-import gymnasium as gym
-import math
-import torch
-import omni.isaac.core.utils.nucleus as nucleus_utils
-import omni.isaac.core.utils.prims as prim_utils
-from omni.isaac.core.articulations import ArticulationView
-import omni.isaac.orbit.compat.utils.kit as kit_utils
-from omni.isaac.orbit_tasks.isaac_env import IsaacEnv, VecEnvIndices, VecEnvObs
-from omni.isaac.orbit_tasks.isaac_env_cfg import EnvCfg, IsaacEnvCfg
-class CartpoleEnv(IsaacEnv):
-    """Environment for 2-D cartpole.
-    Reference:
-        https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
-    """
-    def __init__(self, cfg: dict, **kwargs):
-        """Initializes the environment.
-        Args:
-            cfg: The configuration dictionary.
-            kwargs: Additional keyword arguments. See IsaacEnv for more details.
-        """
-        # copy configuration
-        self.cfg_dict = cfg.copy()
-        # configuration for the environment
-        isaac_cfg = IsaacEnvCfg(
-            env=EnvCfg(num_envs=self.cfg_dict["env"]["num_envs"], env_spacing=self.cfg_dict["env"]["env_spacing"])
-        )
-        isaac_cfg.sim.from_dict(self.cfg_dict["sim"])
-        # initialize the base class to setup the scene.
-        super().__init__(isaac_cfg, **kwargs)
-        # define views over instances
-        self.cartpoles = ArticulationView(prim_paths_expr=self.env_ns + "/.*/Cartpole", reset_xform_properties=False)
-        # play the simulator to activate physics handles
-        # note: this activates the physics simulation view that exposes TensorAPIs
-        self.sim.reset()
-        # initialize all the handles
-        self.cartpoles.initialize(self.sim.physics_sim_view)
-        # set the default state
-        self.cartpoles.post_reset()
-        # get quantities from scene we care about
-        self._cart_dof_idx = self.cartpoles.get_dof_index("cartJoint")
-        self._pole_dof_idx = self.cartpoles.get_dof_index("poleJoint")
-        # compute the observation space
-        self.observation_space = gym.spaces.Box(low=-math.inf, high=math.inf, shape=(4,))
-        # compute the action space
-        self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(1,))
-        # store maximum episode length
-        self.max_episode_length = self.cfg_dict["env"]["episode_length"]
-    """
-    Implementation specifics.
-    """
-    def _design_scene(self) -> list[str]:
-        # get nucleus assets path
-        assets_root_path = nucleus_utils.get_assets_root_path()
-        if assets_root_path is None:
-            raise RuntimeError(
-                "Unable to access the Nucleus server from Omniverse. For more information, please check:"
-                " https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/overview.html#omniverse-nucleus"
-            )
-        # ground plane
-        kit_utils.create_ground_plane("/World/defaultGroundPlane")
-        # robot
-        robot_usd_path = assets_root_path + "/Isaac/Robots/Cartpole/cartpole.usd"
-        prim_utils.create_prim(
-            prim_path=self.template_env_ns + "/Cartpole", usd_path=robot_usd_path, translation=(0.0, 0.0, 2.0)
-        )
-        # apply articulation settings
-        kit_utils.set_articulation_properties(
-            prim_path=self.template_env_ns + "/Cartpole",
-            solver_position_iteration_count=self.cfg_dict["scene"]["cartpole"]["solver_position_iteration_count"],
-            solver_velocity_iteration_count=self.cfg_dict["scene"]["cartpole"]["solver_velocity_iteration_count"],
-            sleep_threshold=self.cfg_dict["scene"]["cartpole"]["sleep_threshold"],
-            stabilization_threshold=self.cfg_dict["scene"]["cartpole"]["stabilization_threshold"],
-            enable_self_collisions=self.cfg_dict["scene"]["cartpole"]["enable_self_collisions"],
-        )
-        # apply rigid body settings
-        kit_utils.set_nested_rigid_body_properties(
-            prim_path=self.template_env_ns + "/Cartpole",
-            enable_gyroscopic_forces=self.cfg_dict["scene"]["cartpole"]["enable_gyroscopic_forces"],
-            max_depenetration_velocity=self.cfg_dict["scene"]["cartpole"]["max_depenetration_velocity"],
-        )
-        # apply collider properties
-        kit_utils.set_nested_collision_properties(
-            prim_path=self.template_env_ns + "/Cartpole",
-            contact_offset=self.cfg_dict["scene"]["cartpole"]["contact_offset"],
-            rest_offset=self.cfg_dict["scene"]["cartpole"]["rest_offset"],
-        )
-        # return global prims
-        return ["/World/defaultGroundPlane"]
-    def _reset_idx(self, env_ids: VecEnvIndices):
-        # get num envs to reset
-        num_resets = len(env_ids)
-        # randomize the MDP
-        # -- DOF position
-        dof_pos = torch.zeros((num_resets, self.cartpoles.num_dof), device=self.device)
-        dof_pos[:, self._cart_dof_idx] = 1.0 * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
-        dof_pos[:, self._pole_dof_idx] = 0.125 * math.pi * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
-        self.cartpoles.set_joint_positions(dof_pos, indices=env_ids)
-        # -- DOF velocity
-        dof_vel = torch.zeros((num_resets, self.cartpoles.num_dof), device=self.device)
-        dof_vel[:, self._cart_dof_idx] = 0.5 * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
-        dof_vel[:, self._pole_dof_idx] = 0.25 * math.pi * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
-        self.cartpoles.set_joint_velocities(dof_vel, indices=env_ids)
-        # -- MDP reset
-        self.reset_buf[env_ids] = 0
-        self.episode_length_buf[env_ids] = 0
-    def _step_impl(self, actions: torch.Tensor):
-        # reset environments that terminated
-        reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
-        if len(reset_env_ids) > 0:
-            self._reset_idx(reset_env_ids)
-        # increment the number of steps
-        self.episode_length_buf += 1
-        # pre-step: set actions into buffer
-        self.actions = actions.clone().to(device=self.device)
-        dof_forces = torch.zeros(
-            (self.cartpoles.count, self.cartpoles.num_dof), dtype=torch.float32, device=self.device
-        )
-        dof_forces[:, self._cart_dof_idx] = self.cfg_dict["env"]["max_effort"] * self.actions[:, 0]
-        indices = torch.arange(self.cartpoles.count, dtype=torch.int32, device=self.device)
-        self.cartpoles.set_joint_efforts(dof_forces, indices=indices)
-        # perform physics stepping
-        for _ in range(self.cfg_dict["env"]["control_frequency_inv"]):
-            # step simulation
-            self.sim.step(render=self.enable_render)
-            # check that simulation is playing
-            if self.sim.is_stopped():
-                return
-        # post-step: compute MDP
-        self._compute_rewards()
-        self._check_termination()
-        # add information to extra if timeout occurred due to episode length
-        # Note: this is used by algorithms like PPO where time-outs are handled differently
-        # For more info: https://github.com/DLR-RM/stable-baselines3/issues/633
-        self.extras["time_outs"] = self.episode_length_buf >= self.cfg_dict["env"]["episode_length"]
-    def _get_observations(self) -> VecEnvObs:
-        # access buffers from simulator
-        dof_pos = self.cartpoles.get_joint_positions(clone=False)
-        dof_vel = self.cartpoles.get_joint_velocities(clone=False)
-        # concatenate and return
-        obs_buf = torch.cat([dof_pos, dof_vel], dim=-1)
-        return {"policy": obs_buf}
-    """
-    Helper functions.
-    """
-    def _compute_rewards(self) -> None:
-        # access buffers from simulator
-        dof_pos = self.cartpoles.get_joint_positions(clone=False)
-        dof_vel = self.cartpoles.get_joint_velocities(clone=False)
-        # extract values from buffer
-        cart_pos = dof_pos[:, self._cart_dof_idx]
-        pole_pos = dof_pos[:, self._pole_dof_idx]
-        cart_vel = dof_vel[:, self._cart_dof_idx]
-        pole_vel = dof_vel[:, self._pole_dof_idx]
-        # compute reward
-        reward = 1.0 - pole_pos * pole_pos - 0.01 * torch.abs(cart_vel) - 0.005 * torch.abs(pole_vel)
-        reward = torch.where(
-            torch.abs(cart_pos) > self.cfg_dict["env"]["reset_dist"], torch.ones_like(reward) * -2.0, reward
-        )
-        reward = torch.where(torch.abs(pole_pos) > math.pi / 2, torch.ones_like(reward) * -2.0, reward)
-        # set reward into buffer
-        self.reward_buf[:] = reward
-    def _check_termination(self) -> None:
-        # access buffers from simulator
-        dof_pos = self.cartpoles.get_joint_positions(clone=False)
-        # extract values from buffer
-        cart_pos = dof_pos[:, self._cart_dof_idx]
-        pole_pos = dof_pos[:, self._pole_dof_idx]
-        # compute resets
-        # -- cart moved towards the edges
-        resets = torch.where(torch.abs(cart_pos) > self.cfg_dict["env"]["reset_dist"], 1, 0)
-        # -- pole fell down
-        resets = torch.where(torch.abs(pole_pos) > math.pi / 2, 1, resets)
-        # -- episode length
-        resets = torch.where(self.episode_length_buf >= self.max_episode_length, 1, resets)
-        # set resets into buffer
-        self.reset_buf[:] = resets
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/cartpole_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/cartpole_env_cfg.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+import math
+from omni.isaac.orbit_assets import ORBIT_ASSETS_DATA_DIR
+import omni.isaac.orbit.sim as sim_utils
+from omni.isaac.orbit.actuators import ImplicitActuatorCfg
+from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
+from omni.isaac.orbit.command_generators.command_generator_cfg import NullCommandGeneratorCfg
+from omni.isaac.orbit.envs import RLTaskEnvCfg
+from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
+from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
+from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
+from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
+from omni.isaac.orbit.managers import SceneEntityCfg
+from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
+from omni.isaac.orbit.scene import InteractiveSceneCfg
+from omni.isaac.orbit.utils import configclass
+import omni.isaac.orbit_tasks.classic.cartpole.mdp as mdp
+##
+# Scene definition
+##
+@configclass
+class CartpoleSceneCfg(InteractiveSceneCfg):
+    """Configuration for the cartpole scene."""
+    # ground plane
+    ground = AssetBaseCfg(
+        prim_path="/World/ground",
+        spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
+    )
+    # robots
+    robot = ArticulationCfg(
+        prim_path="{ENV_REGEX_NS}/Robot",
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=f"{ORBIT_ASSETS_DATA_DIR}/Robots/Classic/Cartpole/cartpole.usd",
+            rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                max_linear_velocity=1000.0,
+                max_angular_velocity=1000.0,
+                max_depenetration_velocity=100.0,
+                enable_gyroscopic_forces=True,
+            ),
+            articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+                enabled_self_collisions=False,
+                solver_position_iteration_count=4,
+                solver_velocity_iteration_count=0,
+                sleep_threshold=0.005,
+                stabilization_threshold=0.001,
+            ),
+        ),
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, 0.0, 2.0), joint_pos={"slider_to_cart": 0.0, "cart_to_pole": 0.0}
+        ),
+        actuators={
+            "cart_actuator": ImplicitActuatorCfg(
+                joint_names_expr=["slider_to_cart"],
+                effort_limit=400.0,
+                velocity_limit=100.0,
+                stiffness=0.0,
+                damping=10.0,
+            ),
+            "pole_actuator": ImplicitActuatorCfg(
+                joint_names_expr=["cart_to_pole"], effort_limit=400.0, velocity_limit=100.0, stiffness=0.0, damping=0.0
+            ),
+        },
+    )
+    # lights
+    dome_light = AssetBaseCfg(
+        prim_path="/World/DomeLight",
+        spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
+    )
+    distant_light = AssetBaseCfg(
+        prim_path="/World/DistantLight",
+        spawn=sim_utils.DistantLightCfg(color=(0.9, 0.9, 0.9), intensity=2500.0),
+        init_state=AssetBaseCfg.InitialStateCfg(rot=(0.738, 0.477, 0.477, 0.0)),
+    )
+##
+# MDP settings
+##
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+    joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+        # observation terms (order preserved)
+        joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
+        joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
+        def __post_init__(self) -> None:
+            self.enable_corruption = False
+            self.concatenate_terms = True
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+@configclass
+class RandomizationCfg:
+    """Configuration for randomization."""
+    # reset
+    reset_cart_position = RandTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
+            "position_range": (-1.0, 1.0),
+            "velocity_range": (-0.5, 0.5),
+        },
+    )
+    reset_pole_position = RandTerm(
+        func=mdp.reset_joints_by_offset,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
+            "position_range": (-0.25 * math.pi, 0.25 * math.pi),
+            "velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
+        },
+    )
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+    # (1) Constant running reward
+    alive = RewTerm(func=mdp.alive_bonus, weight=1.0)
+    # (2) Failure penalty
+    terminating = RewTerm(func=mdp.termination_penalty, weight=-2.0)
+    # (3) Primary task: keep pole upright
+    pole_pos = RewTerm(
+        func=mdp.joint_pos_target_l2,
+        weight=-1.0,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
+    )
+    # (4) Shaping tasks: lower cart velocity
+    cart_vel = RewTerm(
+        func=mdp.joint_vel_l1,
+        weight=-0.01,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
+    )
+    # (5) Shaping tasks: lower pole angular velocity
+    pole_vel = RewTerm(
+        func=mdp.joint_vel_l1,
+        weight=-0.005,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
+    )
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+    # (1) Time out
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+    # (2) Cart out of bounds
+    cart_out_of_bounds = DoneTerm(
+        func=mdp.joint_pos_manual_limit,
+        params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
+    )
+@configclass
+class CurriculumCfg:
+    """Configuration for the curriculum."""
+    pass
+##
+# Environment configuration
+##
+@configclass
+class CartpoleEnvCfg(RLTaskEnvCfg):
+    """Configuration for the locomotion velocity-tracking environment."""
+    # Scene settings
+    scene: CartpoleSceneCfg = CartpoleSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    randomization: RandomizationCfg = RandomizationCfg()
+    # MDP settings
+    curriculum: CurriculumCfg = CurriculumCfg()
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    # No command generator
+    commands: NullCommandGeneratorCfg = NullCommandGeneratorCfg()
+    def __post_init__(self) -> None:
+        """Post initialization."""
+        # general settings
+        self.decimation = 2
+        self.episode_length_s = 5
+        # viewer settings
+        self.viewer.eye = (8.0, 0.0, 5.0)
+        # simulation settings
+        self.sim.dt = 1 / 120
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/mdp/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/mdp/__init__.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""This sub-module contains the functions that are specific to the cartpole environments."""
+from omni.isaac.orbit.envs.mdp import *  # noqa: F401, F403
+from .rewards import *  # noqa: F401, F403
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/mdp/rewards.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/cartpole/mdp/rewards.py
+# Copyright (c) 2022-2023, The ORBIT Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from __future__ import annotations
+import torch
+from typing import TYPE_CHECKING
+from omni.isaac.orbit.assets import Articulation
+from omni.isaac.orbit.managers import SceneEntityCfg
+from omni.isaac.orbit.utils.math import wrap_to_pi
+if TYPE_CHECKING:
+    from omni.isaac.orbit.envs import RLTaskEnv
+def joint_pos_target_l2(env: RLTaskEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
+    """Penalize joint position deviation from a target value."""
+    # extract the used quantities (to enable type-hinting)
+    asset: Articulation = env.scene[asset_cfg.name]
+    # wrap the joint positions to (-pi, pi)
+    joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
+    # compute the reward
+    return torch.sum(torch.square(joint_pos - target), dim=1)
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/humanoid/__init__.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/classic/humanoid/__init__.py
@@ -7,21 +7,21 @@
 Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2).
 """
-import gymnasium as gym
+# import gymnasium as gym
-from . import agents
+# from . import agents
 ##
 # Register Gym environments.
 ##
-gym.register(
+# gym.register(
-    id="Isaac-Humanoid-v0",
+#     id="Isaac-Humanoid-v0",
-    entry_point="omni.isaac.orbit.envs:RLTaskEnv",
+#     entry_point="omni.isaac.orbit.envs:RLTaskEnv",
-    kwargs={
+#     kwargs={
-        "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
+#         "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
-        "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
+#         "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
-        "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
+#         "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
-        "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
+#         "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
-    },
+#     },
-)
+# )
--- a/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/velocity_env_cfg.py
+++ b/source/extensions/omni.isaac.orbit_tasks/omni/isaac/orbit_tasks/locomotion/velocity/velocity_env_cfg.py
@@ -157,7 +157,7 @@ class RandomizationCfg:
    )
    reset_base = RandTerm(
-        func=mdp.reset_root_state,
+        func=mdp.reset_root_state_uniform,
        mode="reset",
        params={
            "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},