Unverified Commit 541dd06d authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Updates cartpole environment to new framework (#241)

# Description

This MR provides the Cartpole environment reimplemented with the new
RLTaskEnv.

Replaces #177

## Type of change

- Bug fix (non-breaking change which fixes an issue)

## Screenshot

Training results with SB-3 (orange), RSL-RL (purple) and RL-Games
(black)


![sb3](https://github.com/isaac-orbit/orbit/assets/12863862/c4b7a50e-099a-4391-8142-e2924e633429)

![rslr-l](https://github.com/isaac-orbit/orbit/assets/12863862/aa223a46-d4a8-454a-b26d-b6359e173bbc)

![rlg](https://github.com/isaac-orbit/orbit/assets/12863862/2f20d837-40af-4fa2-ac58-7f520c4a3517)


## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent 5d44141e
......@@ -169,14 +169,14 @@ def push_by_setting_velocity(
asset.write_root_velocity_to_sim(vel_w, env_ids=env_ids)
def reset_root_state(
def reset_root_state_uniform(
env: BaseEnv,
env_ids: torch.Tensor,
pose_range: dict[str, tuple[float, float]],
velocity_range: dict[str, tuple[float, float]],
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
):
"""Reset the asset root state to a random position and velocity within the given ranges.
"""Reset the asset root state to a random position and velocity uniformly within the given ranges.
This function randomizes the root position and velocity of the asset.
......@@ -245,6 +245,32 @@ def reset_joints_by_scale(
asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
def reset_joints_by_offset(
env: BaseEnv,
env_ids: torch.Tensor,
position_range: tuple[float, float],
velocity_range: tuple[float, float],
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
):
"""Reset the robot joints with offsets around the default position and velocity by the given ranges.
This function samples random values from the given ranges and biases the default joint positions and velocities
by these values. The biased values are then set into the physics simulation.
"""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# get default joint state
joint_pos = asset.data.default_joint_pos[env_ids].clone()
joint_vel = asset.data.default_joint_vel[env_ids].clone()
# bias these values randomly
joint_pos += sample_uniform(*position_range, joint_pos.shape, joint_pos.device)
joint_vel += sample_uniform(*velocity_range, joint_vel.shape, joint_vel.device)
# set into the physics simulation
asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
def reset_scene_to_default(env: BaseEnv, env_ids: torch.Tensor):
"""Reset the scene to the default state specified in the scene configuration."""
# root states
......
......@@ -26,6 +26,11 @@ General.
"""
def alive_bonus(env: RLTaskEnv) -> torch.Tensor:
"""Reward for being alive."""
return ~env.reset_buf * 1.0
def termination_penalty(env: RLTaskEnv) -> torch.Tensor:
"""Penalize terminated episodes that don't correspond to episodic timeouts."""
return env.reset_buf * (~env.termination_manager.time_outs)
......@@ -86,14 +91,21 @@ Joint penalties.
def joint_torques_l2(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Penalize torques applied on the articulation using L2-kernel."""
"""Penalize joint torques applied on the articulation using L2-kernel."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return torch.sum(torch.square(asset.data.applied_torque), dim=1)
def joint_vel_l1(env: RLTaskEnv, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint velocities on the articulation using an L1-kernel."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return torch.sum(torch.abs(asset.data.joint_vel[:, asset_cfg.joint_ids]), dim=1)
def joint_vel_l2(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Penalize joint velocities on the articulation."""
"""Penalize joint velocities on the articulation using L2-kernel."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
return torch.sum(torch.square(asset.data.joint_vel), dim=1)
......
......@@ -82,14 +82,30 @@ def joint_pos_limit(env: RLTaskEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# compute any violations
out_of_upper_limits = torch.any(asset.data.joint_pos > asset.data.soft_joint_pos_limits[:, 0], dim=1)
out_of_lower_limits = torch.any(asset.data.joint_pos < asset.data.soft_joint_pos_limits[:, 1], dim=1)
out_of_upper_limits = torch.any(asset.data.joint_pos > asset.data.soft_joint_pos_limits[..., 1], dim=1)
out_of_lower_limits = torch.any(asset.data.joint_pos < asset.data.soft_joint_pos_limits[..., 0], dim=1)
return torch.logical_or(out_of_upper_limits, out_of_lower_limits)
def joint_velocity_limit(
env: RLTaskEnv, max_velocity, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
def joint_pos_manual_limit(
env: RLTaskEnv, bounds: tuple[float, float], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Terminate when the asset's joint positions are outside of the configured bounds.
Note:
This function is similar to :func:`joint_pos_limit` but allows the user to specify the bounds manually.
"""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
if asset_cfg.joint_ids is None:
asset_cfg.joint_ids = slice(None)
# compute any violations
out_of_upper_limits = torch.any(asset.data.joint_pos[:, asset_cfg.joint_ids] > bounds[1], dim=1)
out_of_lower_limits = torch.any(asset.data.joint_pos[:, asset_cfg.joint_ids] < bounds[0], dim=1)
return torch.logical_or(out_of_upper_limits, out_of_lower_limits)
def joint_vel_limit(env: RLTaskEnv, max_velocity, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Terminate when the asset's joint velocities are outside of the soft joint limits."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
......
......@@ -49,6 +49,6 @@ __version__ = ORBIT_TASKS_METADATA["package"]["version"]
from .utils import import_packages
# The blacklist is used to prevent importing configs from sub-packages
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "classic", "manipulation", "utils"]
_BLACKLIST_PKGS = ["locomotion.velocity.config.anymal_d", "manipulation", "utils", "classic.ant", "classic.humanoid"]
# Import all configs in this package
import_packages(__name__, _BLACKLIST_PKGS)
......@@ -10,9 +10,3 @@ These environments are based on the MuJoCo environments provided by OpenAI.
Reference:
https://github.com/openai/gym/tree/master/gym/envs/mujoco
"""
from .ant import AntEnv
from .cartpole import CartpoleEnv
from .humanoid import HumanoidEnv
__all__ = ["CartpoleEnv", "AntEnv", "HumanoidEnv"]
......@@ -7,21 +7,21 @@
Ant locomotion environment (similar to OpenAI Gym Ant-v2).
"""
import gymnasium as gym
# import gymnasium as gym
from . import agents
# from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Ant-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
# gym.register(
# id="Isaac-Ant-v0",
# entry_point="omni.isaac.orbit.envs:RLTaskEnv",
# kwargs={
# "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
# "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
# "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
# },
# )
......@@ -10,6 +10,7 @@ Cartpole balancing environment.
import gymnasium as gym
from . import agents
from .cartpole_env_cfg import CartpoleEnvCfg
##
# Register Gym environments.
......@@ -18,10 +19,11 @@ from . import agents
gym.register(
id="Isaac-Cartpole-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
"env_cfg_entry_point": CartpoleEnvCfg,
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CARTPOLE_RSL_RL_PPO_CFG",
"rsl_rl_cfg_entry_point": agents.rsl_rl_ppo_cfg.CartpolePPORunnerCfg,
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
......
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from . import rsl_rl_ppo_cfg # noqa: F401, F403
......@@ -50,9 +50,9 @@ params:
multi_gpu: False
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: -1
normalize_input: False
normalize_value: False
num_actors: -1 # configured from the script (based on num_envs)
reward_shaper:
scale_value: 1.0
normalize_advantage: False
......@@ -62,7 +62,7 @@ params:
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 20000
max_epochs: 450
max_epochs: 150
save_best_after: 50
save_frequency: 25
grad_norm: 1.0
......@@ -74,5 +74,5 @@ params:
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_len: 4
bounds_loss_coef: 0
seq_length: 4
bounds_loss_coef: 0.0001
......@@ -3,34 +3,34 @@
#
# SPDX-License-Identifier: BSD-3-Clause
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit_tasks.utils.wrappers.rsl_rl import (
RslRlOnPolicyRunnerCfg,
RslRlPpoActorCriticCfg,
RslRlPpoAlgorithmCfg,
)
CARTPOLE_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
num_steps_per_env=16,
max_iterations=500,
save_interval=50,
experiment_name="cartpole",
run_name="",
resume=False,
load_run=-1,
load_checkpoint=-1,
empirical_normalization=False,
policy=RslRlPpoActorCriticCfg(
@configclass
class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 16
max_iterations = 150
save_interval = 50
experiment_name = "cartpole"
empirical_normalization = False
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[256, 128, 64],
critic_hidden_dims=[256, 128, 64],
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation="elu",
),
algorithm=RslRlPpoAlgorithmCfg(
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.01,
num_learning_epochs=8,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
......@@ -38,5 +38,4 @@ CARTPOLE_RSL_RL_PPO_CFG = RslRlOnPolicyRunnerCfg(
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
),
)
)
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
# 512×500×16
n_timesteps: !!float 2e6
n_timesteps: !!float 1e6
policy: 'MlpPolicy'
n_steps: 16
batch_size: 8192
batch_size: 4096
gae_lambda: 0.95
gamma: 0.99
n_epochs: 8
ent_coef: 0.0
n_epochs: 20
ent_coef: 0.01
learning_rate: !!float 3e-4
clip_range: 0.2
clip_range: !!float 0.2
policy_kwargs: "dict(
log_std_init=-2,
ortho_init=False,
activation_fn=nn.ELU,
net_arch=[32, 32]
net_arch=[32, 32],
squash_output=False,
)"
target_kl: 0.008
vf_coef: 1.0
max_grad_norm: 1.0
# Uses VecNormalize class to normalize obs
normalize_input: True
# Uses VecNormalize class to normalize rew
normalize_value: True
clip_obs: 5
# parameters for the MDP
env:
# general
num_envs: 512
env_spacing: 4.0
episode_length: 500 # sim steps
# step parameters
control_frequency_inv: 2 # 60 Hz
# reset parameters
reset_dist: 3.0
# actions parameters
max_effort: 400.0
# parameters for setting up the scene
scene:
cartpole:
# articulation
solver_position_iteration_count: 4
solver_velocity_iteration_count: 1
sleep_threshold: 0.005
stabilization_threshold: 0.001
enable_self_collisions: False
# per-body
enable_gyroscopic_forces: True
max_depenetration_velocity: 100.0
# per-shape
contact_offset: 0.02
rest_offset: 0.001
# parameters for physics engine
sim:
dt: 0.0083 # 1/120 s
substeps: 1
gravity: [0.0, 0.0, -9.81]
enable_scene_query_support: False
use_gpu_pipeline: True
use_flatcache: True # deprecated from Isaac Sim 2023.1 onwards
use_fabric: True # used from Isaac Sim 2023.1 onwards
device: "cuda:0"
physx:
# Solver settings
solver_type: 1
use_gpu: True # set to False to run on CPU
bounce_threshold_velocity: 0.2
friction_offset_threshold: 0.04
friction_correlation_distance: 0.025
enable_stabilization: True
# GPU buffers
gpu_max_rigid_contact_count: 524288
gpu_max_rigid_patch_count: 81920
gpu_found_lost_pairs_capacity: 1024
gpu_found_lost_aggregate_pairs_capacity: 262144
gpu_total_aggregate_pairs_capacity: 1024
gpu_max_soft_body_contacts: 1048576
gpu_max_particle_contacts: 1048576
gpu_heap_capacity: 67108864
gpu_temp_buffer_capacity: 16777216
gpu_max_num_partitions: 8
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import gymnasium as gym
import math
import torch
import omni.isaac.core.utils.nucleus as nucleus_utils
import omni.isaac.core.utils.prims as prim_utils
from omni.isaac.core.articulations import ArticulationView
import omni.isaac.orbit.compat.utils.kit as kit_utils
from omni.isaac.orbit_tasks.isaac_env import IsaacEnv, VecEnvIndices, VecEnvObs
from omni.isaac.orbit_tasks.isaac_env_cfg import EnvCfg, IsaacEnvCfg
class CartpoleEnv(IsaacEnv):
"""Environment for 2-D cartpole.
Reference:
https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
"""
def __init__(self, cfg: dict, **kwargs):
"""Initializes the environment.
Args:
cfg: The configuration dictionary.
kwargs: Additional keyword arguments. See IsaacEnv for more details.
"""
# copy configuration
self.cfg_dict = cfg.copy()
# configuration for the environment
isaac_cfg = IsaacEnvCfg(
env=EnvCfg(num_envs=self.cfg_dict["env"]["num_envs"], env_spacing=self.cfg_dict["env"]["env_spacing"])
)
isaac_cfg.sim.from_dict(self.cfg_dict["sim"])
# initialize the base class to setup the scene.
super().__init__(isaac_cfg, **kwargs)
# define views over instances
self.cartpoles = ArticulationView(prim_paths_expr=self.env_ns + "/.*/Cartpole", reset_xform_properties=False)
# play the simulator to activate physics handles
# note: this activates the physics simulation view that exposes TensorAPIs
self.sim.reset()
# initialize all the handles
self.cartpoles.initialize(self.sim.physics_sim_view)
# set the default state
self.cartpoles.post_reset()
# get quantities from scene we care about
self._cart_dof_idx = self.cartpoles.get_dof_index("cartJoint")
self._pole_dof_idx = self.cartpoles.get_dof_index("poleJoint")
# compute the observation space
self.observation_space = gym.spaces.Box(low=-math.inf, high=math.inf, shape=(4,))
# compute the action space
self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(1,))
# store maximum episode length
self.max_episode_length = self.cfg_dict["env"]["episode_length"]
"""
Implementation specifics.
"""
def _design_scene(self) -> list[str]:
# get nucleus assets path
assets_root_path = nucleus_utils.get_assets_root_path()
if assets_root_path is None:
raise RuntimeError(
"Unable to access the Nucleus server from Omniverse. For more information, please check:"
" https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/overview.html#omniverse-nucleus"
)
# ground plane
kit_utils.create_ground_plane("/World/defaultGroundPlane")
# robot
robot_usd_path = assets_root_path + "/Isaac/Robots/Cartpole/cartpole.usd"
prim_utils.create_prim(
prim_path=self.template_env_ns + "/Cartpole", usd_path=robot_usd_path, translation=(0.0, 0.0, 2.0)
)
# apply articulation settings
kit_utils.set_articulation_properties(
prim_path=self.template_env_ns + "/Cartpole",
solver_position_iteration_count=self.cfg_dict["scene"]["cartpole"]["solver_position_iteration_count"],
solver_velocity_iteration_count=self.cfg_dict["scene"]["cartpole"]["solver_velocity_iteration_count"],
sleep_threshold=self.cfg_dict["scene"]["cartpole"]["sleep_threshold"],
stabilization_threshold=self.cfg_dict["scene"]["cartpole"]["stabilization_threshold"],
enable_self_collisions=self.cfg_dict["scene"]["cartpole"]["enable_self_collisions"],
)
# apply rigid body settings
kit_utils.set_nested_rigid_body_properties(
prim_path=self.template_env_ns + "/Cartpole",
enable_gyroscopic_forces=self.cfg_dict["scene"]["cartpole"]["enable_gyroscopic_forces"],
max_depenetration_velocity=self.cfg_dict["scene"]["cartpole"]["max_depenetration_velocity"],
)
# apply collider properties
kit_utils.set_nested_collision_properties(
prim_path=self.template_env_ns + "/Cartpole",
contact_offset=self.cfg_dict["scene"]["cartpole"]["contact_offset"],
rest_offset=self.cfg_dict["scene"]["cartpole"]["rest_offset"],
)
# return global prims
return ["/World/defaultGroundPlane"]
def _reset_idx(self, env_ids: VecEnvIndices):
# get num envs to reset
num_resets = len(env_ids)
# randomize the MDP
# -- DOF position
dof_pos = torch.zeros((num_resets, self.cartpoles.num_dof), device=self.device)
dof_pos[:, self._cart_dof_idx] = 1.0 * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
dof_pos[:, self._pole_dof_idx] = 0.125 * math.pi * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
self.cartpoles.set_joint_positions(dof_pos, indices=env_ids)
# -- DOF velocity
dof_vel = torch.zeros((num_resets, self.cartpoles.num_dof), device=self.device)
dof_vel[:, self._cart_dof_idx] = 0.5 * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
dof_vel[:, self._pole_dof_idx] = 0.25 * math.pi * (1.0 - 2.0 * torch.rand(num_resets, device=self.device))
self.cartpoles.set_joint_velocities(dof_vel, indices=env_ids)
# -- MDP reset
self.reset_buf[env_ids] = 0
self.episode_length_buf[env_ids] = 0
def _step_impl(self, actions: torch.Tensor):
# reset environments that terminated
reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
if len(reset_env_ids) > 0:
self._reset_idx(reset_env_ids)
# increment the number of steps
self.episode_length_buf += 1
# pre-step: set actions into buffer
self.actions = actions.clone().to(device=self.device)
dof_forces = torch.zeros(
(self.cartpoles.count, self.cartpoles.num_dof), dtype=torch.float32, device=self.device
)
dof_forces[:, self._cart_dof_idx] = self.cfg_dict["env"]["max_effort"] * self.actions[:, 0]
indices = torch.arange(self.cartpoles.count, dtype=torch.int32, device=self.device)
self.cartpoles.set_joint_efforts(dof_forces, indices=indices)
# perform physics stepping
for _ in range(self.cfg_dict["env"]["control_frequency_inv"]):
# step simulation
self.sim.step(render=self.enable_render)
# check that simulation is playing
if self.sim.is_stopped():
return
# post-step: compute MDP
self._compute_rewards()
self._check_termination()
# add information to extra if timeout occurred due to episode length
# Note: this is used by algorithms like PPO where time-outs are handled differently
# For more info: https://github.com/DLR-RM/stable-baselines3/issues/633
self.extras["time_outs"] = self.episode_length_buf >= self.cfg_dict["env"]["episode_length"]
def _get_observations(self) -> VecEnvObs:
# access buffers from simulator
dof_pos = self.cartpoles.get_joint_positions(clone=False)
dof_vel = self.cartpoles.get_joint_velocities(clone=False)
# concatenate and return
obs_buf = torch.cat([dof_pos, dof_vel], dim=-1)
return {"policy": obs_buf}
"""
Helper functions.
"""
def _compute_rewards(self) -> None:
# access buffers from simulator
dof_pos = self.cartpoles.get_joint_positions(clone=False)
dof_vel = self.cartpoles.get_joint_velocities(clone=False)
# extract values from buffer
cart_pos = dof_pos[:, self._cart_dof_idx]
pole_pos = dof_pos[:, self._pole_dof_idx]
cart_vel = dof_vel[:, self._cart_dof_idx]
pole_vel = dof_vel[:, self._pole_dof_idx]
# compute reward
reward = 1.0 - pole_pos * pole_pos - 0.01 * torch.abs(cart_vel) - 0.005 * torch.abs(pole_vel)
reward = torch.where(
torch.abs(cart_pos) > self.cfg_dict["env"]["reset_dist"], torch.ones_like(reward) * -2.0, reward
)
reward = torch.where(torch.abs(pole_pos) > math.pi / 2, torch.ones_like(reward) * -2.0, reward)
# set reward into buffer
self.reward_buf[:] = reward
def _check_termination(self) -> None:
# access buffers from simulator
dof_pos = self.cartpoles.get_joint_positions(clone=False)
# extract values from buffer
cart_pos = dof_pos[:, self._cart_dof_idx]
pole_pos = dof_pos[:, self._pole_dof_idx]
# compute resets
# -- cart moved towards the edges
resets = torch.where(torch.abs(cart_pos) > self.cfg_dict["env"]["reset_dist"], 1, 0)
# -- pole fell down
resets = torch.where(torch.abs(pole_pos) > math.pi / 2, 1, resets)
# -- episode length
resets = torch.where(self.episode_length_buf >= self.max_episode_length, 1, resets)
# set resets into buffer
self.reset_buf[:] = resets
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
import math
from omni.isaac.orbit_assets import ORBIT_ASSETS_DATA_DIR
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.actuators import ImplicitActuatorCfg
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.command_generators.command_generator_cfg import NullCommandGeneratorCfg
from omni.isaac.orbit.envs import RLTaskEnvCfg
from omni.isaac.orbit.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.orbit.managers import ObservationTermCfg as ObsTerm
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.managers import RewardTermCfg as RewTerm
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.managers import TerminationTermCfg as DoneTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.utils import configclass
import omni.isaac.orbit_tasks.classic.cartpole.mdp as mdp
##
# Scene definition
##
@configclass
class CartpoleSceneCfg(InteractiveSceneCfg):
"""Configuration for the cartpole scene."""
# ground plane
ground = AssetBaseCfg(
prim_path="/World/ground",
spawn=sim_utils.GroundPlaneCfg(size=(100.0, 100.0)),
)
# robots
robot = ArticulationCfg(
prim_path="{ENV_REGEX_NS}/Robot",
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ORBIT_ASSETS_DATA_DIR}/Robots/Classic/Cartpole/cartpole.usd",
rigid_props=sim_utils.RigidBodyPropertiesCfg(
max_linear_velocity=1000.0,
max_angular_velocity=1000.0,
max_depenetration_velocity=100.0,
enable_gyroscopic_forces=True,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=False,
solver_position_iteration_count=4,
solver_velocity_iteration_count=0,
sleep_threshold=0.005,
stabilization_threshold=0.001,
),
),
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 2.0), joint_pos={"slider_to_cart": 0.0, "cart_to_pole": 0.0}
),
actuators={
"cart_actuator": ImplicitActuatorCfg(
joint_names_expr=["slider_to_cart"],
effort_limit=400.0,
velocity_limit=100.0,
stiffness=0.0,
damping=10.0,
),
"pole_actuator": ImplicitActuatorCfg(
joint_names_expr=["cart_to_pole"], effort_limit=400.0, velocity_limit=100.0, stiffness=0.0, damping=0.0
),
},
)
# lights
dome_light = AssetBaseCfg(
prim_path="/World/DomeLight",
spawn=sim_utils.DomeLightCfg(color=(0.9, 0.9, 0.9), intensity=500.0),
)
distant_light = AssetBaseCfg(
prim_path="/World/DistantLight",
spawn=sim_utils.DistantLightCfg(color=(0.9, 0.9, 0.9), intensity=2500.0),
init_state=AssetBaseCfg.InitialStateCfg(rot=(0.738, 0.477, 0.477, 0.0)),
)
##
# MDP settings
##
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=["slider_to_cart"], scale=100.0)
@configclass
class ObservationsCfg:
"""Observation specifications for the MDP."""
@configclass
class PolicyCfg(ObsGroup):
"""Observations for policy group."""
# observation terms (order preserved)
joint_pos_rel = ObsTerm(func=mdp.joint_pos_rel)
joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel)
def __post_init__(self) -> None:
self.enable_corruption = False
self.concatenate_terms = True
# observation groups
policy: PolicyCfg = PolicyCfg()
@configclass
class RandomizationCfg:
"""Configuration for randomization."""
# reset
reset_cart_position = RandTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]),
"position_range": (-1.0, 1.0),
"velocity_range": (-0.5, 0.5),
},
)
reset_pole_position = RandTerm(
func=mdp.reset_joints_by_offset,
mode="reset",
params={
"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]),
"position_range": (-0.25 * math.pi, 0.25 * math.pi),
"velocity_range": (-0.25 * math.pi, 0.25 * math.pi),
},
)
@configclass
class RewardsCfg:
"""Reward terms for the MDP."""
# (1) Constant running reward
alive = RewTerm(func=mdp.alive_bonus, weight=1.0)
# (2) Failure penalty
terminating = RewTerm(func=mdp.termination_penalty, weight=-2.0)
# (3) Primary task: keep pole upright
pole_pos = RewTerm(
func=mdp.joint_pos_target_l2,
weight=-1.0,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"]), "target": 0.0},
)
# (4) Shaping tasks: lower cart velocity
cart_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.01,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"])},
)
# (5) Shaping tasks: lower pole angular velocity
pole_vel = RewTerm(
func=mdp.joint_vel_l1,
weight=-0.005,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["cart_to_pole"])},
)
@configclass
class TerminationsCfg:
"""Termination terms for the MDP."""
# (1) Time out
time_out = DoneTerm(func=mdp.time_out, time_out=True)
# (2) Cart out of bounds
cart_out_of_bounds = DoneTerm(
func=mdp.joint_pos_manual_limit,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=["slider_to_cart"]), "bounds": (-3.0, 3.0)},
)
@configclass
class CurriculumCfg:
"""Configuration for the curriculum."""
pass
##
# Environment configuration
##
@configclass
class CartpoleEnvCfg(RLTaskEnvCfg):
"""Configuration for the locomotion velocity-tracking environment."""
# Scene settings
scene: CartpoleSceneCfg = CartpoleSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
# Basic settings
observations: ObservationsCfg = ObservationsCfg()
actions: ActionsCfg = ActionsCfg()
randomization: RandomizationCfg = RandomizationCfg()
# MDP settings
curriculum: CurriculumCfg = CurriculumCfg()
rewards: RewardsCfg = RewardsCfg()
terminations: TerminationsCfg = TerminationsCfg()
# No command generator
commands: NullCommandGeneratorCfg = NullCommandGeneratorCfg()
def __post_init__(self) -> None:
"""Post initialization."""
# general settings
self.decimation = 2
self.episode_length_s = 5
# viewer settings
self.viewer.eye = (8.0, 0.0, 5.0)
# simulation settings
self.sim.dt = 1 / 120
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""This sub-module contains the functions that are specific to the cartpole environments."""
from omni.isaac.orbit.envs.mdp import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
# Copyright (c) 2022-2023, The ORBIT Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
from __future__ import annotations
import torch
from typing import TYPE_CHECKING
from omni.isaac.orbit.assets import Articulation
from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.utils.math import wrap_to_pi
if TYPE_CHECKING:
from omni.isaac.orbit.envs import RLTaskEnv
def joint_pos_target_l2(env: RLTaskEnv, target: float, asset_cfg: SceneEntityCfg) -> torch.Tensor:
"""Penalize joint position deviation from a target value."""
# extract the used quantities (to enable type-hinting)
asset: Articulation = env.scene[asset_cfg.name]
# wrap the joint positions to (-pi, pi)
joint_pos = wrap_to_pi(asset.data.joint_pos[:, asset_cfg.joint_ids])
# compute the reward
return torch.sum(torch.square(joint_pos - target), dim=1)
......@@ -7,21 +7,21 @@
Humanoid locomotion environment (similar to OpenAI Gym Humanoid-v2).
"""
import gymnasium as gym
# import gymnasium as gym
from . import agents
# from . import agents
##
# Register Gym environments.
##
gym.register(
id="Isaac-Humanoid-v0",
entry_point="omni.isaac.orbit.envs:RLTaskEnv",
kwargs={
"env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
"skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
"sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
},
)
# gym.register(
# id="Isaac-Humanoid-v0",
# entry_point="omni.isaac.orbit.envs:RLTaskEnv",
# kwargs={
# "env_cfg_entry_point": f"{__name__}:ant_env_cfg.yaml",
# "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml",
# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml",
# "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml",
# },
# )
......@@ -157,7 +157,7 @@ class RandomizationCfg:
)
reset_base = RandTerm(
func=mdp.reset_root_state,
func=mdp.reset_root_state_uniform,
mode="reset",
params={
"pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment