Unverified Commit 447aa318 authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Makes BaseEnv and RLEnv consistent with desired definitions (#197)

# Description

As discussed in the past, the `BaseEnv` should only contain an
observation-action interface, while `RlEnv` should contain also the
task-specific interface. Based on this, the MR introduces the following
changes:

* Moves randomization manager to the `BaseEnv` class with the default
settings to reset the scene to the defaults specified in the
configurations of assets.
* Moves command generator to `RlEnv` class
* Adds a `NullCommandGenerator` for no command environments: This is
easier to work with than having checks for None.

## Type of change

- Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- This change requires a documentation update

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./orbit.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file

---------
Signed-off-by: 's avatarMayank Mittal <12863862+Mayankm96@users.noreply.github.com>
Co-authored-by: 's avatarjsmith-bdai <142246516+jsmith-bdai@users.noreply.github.com>
parent 97d684c3
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.9.21"
version = "0.9.22"
# Description
title = "ORBIT framework for Robot Learning"
......
Changelog
---------
0.9.22 (2023-10-26)
~~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added a :class:`omni.isaac.orbit.command_generators.NullCommandGenerator` class for no command environments.
This is easier to work with than having checks for :obj:`None` in the command generator.
Fixed
^^^^^
* Moved the randomization manager to the :class:`omni.isaac.orbit.envs.BaseEnv` class with the default
settings to reset the scene to the defaults specified in the configurations of assets.
* Moved command generator to the :class:`omni.isaac.orbit.envs.RlEnv` class to have all task-specification
related classes in the same place.
0.9.21 (2023-10-26)
~~~~~~~~~~~~~~~~~~~
......
......@@ -21,15 +21,20 @@ from .command_generator_base import CommandGeneratorBase
from .command_generator_cfg import (
CommandGeneratorBaseCfg,
NormalVelocityCommandGeneratorCfg,
NullCommandGeneratorCfg,
TerrainBasedPositionCommandGeneratorCfg,
UniformVelocityCommandGeneratorCfg,
)
from .null_command_generator import NullCommandGenerator
from .position_command_generator import TerrainBasedPositionCommandGenerator
from .velocity_command_generator import NormalVelocityCommandGenerator, UniformVelocityCommandGenerator
__all__ = [
"CommandGeneratorBase",
"CommandGeneratorBaseCfg",
# null command generator
"NullCommandGenerator",
"NullCommandGeneratorCfg",
# velocity command generators
"UniformVelocityCommandGenerator",
"UniformVelocityCommandGeneratorCfg",
......
......@@ -5,11 +5,13 @@
from __future__ import annotations
import math
from dataclasses import MISSING
from omni.isaac.orbit.utils import configclass
from .command_generator_base import CommandGeneratorBase
from .null_command_generator import NullCommandGenerator
from .position_command_generator import TerrainBasedPositionCommandGenerator
from .velocity_command_generator import NormalVelocityCommandGenerator, UniformVelocityCommandGenerator
......@@ -34,6 +36,23 @@ class CommandGeneratorBaseCfg:
"""Whether to visualize debug information. Defaults to False."""
"""
Null-command generator.
"""
@configclass
class NullCommandGeneratorCfg(CommandGeneratorBaseCfg):
"""Configuration for the null command generator."""
class_type: type = NullCommandGenerator
def __post_init__(self):
"""Post initialization."""
# set the resampling time range to infinity to avoid resampling
self.resampling_time_range = (math.inf, math.inf)
"""
Locomotion-specific command generators.
"""
......
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Sub-module containing command generator that does nothing."""
from __future__ import annotations
from typing import TYPE_CHECKING, Sequence
from omni.isaac.orbit.command_generators.command_generator_base import CommandGeneratorBase
if TYPE_CHECKING:
from .command_generator_cfg import NullCommandGeneratorCfg
class NullCommandGenerator(CommandGeneratorBase):
"""Command generator that does nothing.
This command generator does not generate any commands. It is used for environments that do not
require any commands.
"""
cfg: NullCommandGeneratorCfg
"""Configuration for the command generator."""
"""
Properties
"""
@property
def command(self):
"""Null command.
Raises:
RuntimeError: No command is generated. Always raises this error.
"""
raise RuntimeError("NullCommandGenerator does not generate any commands.")
"""
Implementation specific functions.
"""
def _resample_command(self, env_ids: Sequence[int]):
pass
def _update_command(self):
pass
def _update_metrics(self):
pass
......@@ -5,16 +5,18 @@
from .base_env import BaseEnv
from .base_env_cfg import BaseEnvCfg, ViewerCfg
from .rl_env import RLEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
from .rl_env import RLEnv, VecEnvObs, VecEnvStepReturn
from .rl_env_cfg import RLEnvCfg
__all__ = [
# base
"BaseEnv",
"BaseEnvCfg",
"ViewerCfg",
# rl
"RLEnv",
"RLEnvCfg",
"VecEnvIndices",
# env type variables
"VecEnvObs",
"VecEnvStepReturn",
]
......@@ -7,8 +7,9 @@ from __future__ import annotations
import builtins
from omni.isaac.orbit.command_generators import CommandGeneratorBase
from omni.isaac.orbit.managers import ActionManager, ObservationManager
import omni.isaac.core.utils.torch as torch_utils
from omni.isaac.orbit.managers import ActionManager, ObservationManager, RandomizationManager
from omni.isaac.orbit.scene import InteractiveScene
from omni.isaac.orbit.sim import SimulationContext
from omni.isaac.orbit.utils.timer import Timer
......@@ -36,10 +37,10 @@ class BaseEnv:
raw actions at different levels of abstraction. For example, in case of a robotic arm, the raw actions
can be joint torques, joint positions, or end-effector poses. Similarly for a mobile base, it can be
the joint torques, or the desired velocity of the floating base.
* **Command Generator**: The command generator that generates the goal commands for the robot. These
commands are used by the observation manager to generate the observations. For example, in case of a
robotic arm, the goal commands can be the object to be grasped, or the desired end-effector pose. For
a mobile base, it can be the goal position and orientation of the base.
* **Randomization Manager**: The randomization manager that randomizes different elements in the scene.
This includes resetting the scene to a default state or randomize the scene at different intervals
of time. The randomization manager can be configured to randomize different elements of the scene
such as the masses of objects, friction coefficients, or apply random pushes to the robot.
The environment provides a unified interface for interacting with the simulation. However, it does not
include task-specific quantities such as the reward function, or the termination conditions. These
......@@ -135,7 +136,7 @@ class BaseEnv:
return self.sim.device
"""
Operations.
Operations - Setup.
"""
def load_managers(self):
......@@ -145,16 +146,34 @@ class BaseEnv:
This must happen after the simulator is reset, i.e. after the first call to :meth:`self.sim.reset`.
"""
# prepare the managers
# note: this order is important since observation manager needs to know the command and action managers
# -- command manager
self.command_manager: CommandGeneratorBase = self.cfg.commands.class_type(self.cfg.commands, self)
print("[INFO] Command Manager: ", self.command_manager)
# -- action manager
self.action_manager = ActionManager(self.cfg.actions, self)
print("[INFO] Action Manager: ", self.action_manager)
# -- observation manager
self.observation_manager = ObservationManager(self.cfg.observations, self)
print("[INFO] Observation Manager:", self.observation_manager)
# -- randomization manager
self.randomization_manager = RandomizationManager(self.cfg.randomization, self)
print("[INFO] Randomization Manager: ", self.randomization_manager)
"""
Operations - MDP.
"""
@staticmethod
def seed(seed: int = -1) -> int:
"""Set the seed for the environment.
Args:
seed: The seed for random generator. Defaults to -1.
Returns:
The seed used for random generator.
"""
import omni.replicator.core as rep
rep.set_global_seed(seed)
return torch_utils.set_seed(seed)
def close(self):
"""Cleanup for the environment."""
......
......@@ -12,7 +12,8 @@ configuring the environment instances, viewer settings, and simulation parameter
from dataclasses import MISSING
from typing import Tuple
from omni.isaac.orbit.command_generators import CommandGeneratorBaseCfg
import omni.isaac.orbit.envs.mdp as mdp
from omni.isaac.orbit.managers import RandomizationTermCfg as RandTerm
from omni.isaac.orbit.scene import InteractiveSceneCfg
from omni.isaac.orbit.sim import SimulationCfg
from omni.isaac.orbit.utils import configclass
......@@ -38,6 +39,17 @@ class ViewerCfg:
"""
@configclass
class DefaultRandomizationManagerCfg:
"""Configuration of the default randomization manager.
This manager is used to reset the scene to a default state. The default state is specified
by the scene configuration.
"""
reset_scene_to_default = RandTerm(func=mdp.reset_scene_to_default, mode="reset")
@configclass
class BaseEnvCfg:
"""Base configuration of the environment."""
......@@ -59,5 +71,6 @@ class BaseEnvCfg:
"""Observation space settings."""
actions: object = MISSING
"""Action space settings."""
commands: CommandGeneratorBaseCfg = MISSING
"""Command generator settings."""
randomization: object = DefaultRandomizationManagerCfg()
"""Randomization settings. Defaults to the default randomization manager, which resets
the scene to its default state."""
......@@ -22,11 +22,11 @@ from omni.isaac.orbit.managers import SceneEntityCfg
from omni.isaac.orbit.utils.math import quat_from_euler_xyz, sample_uniform
if TYPE_CHECKING:
from omni.isaac.orbit.envs.rl_env import RLEnv
from omni.isaac.orbit.envs.base_env import BaseEnv
def randomize_rigid_body_material(
env: RLEnv,
env: BaseEnv,
env_ids: torch.Tensor | None,
static_friction_range: tuple[float, float],
dynamic_friction_range: tuple[float, float],
......@@ -79,7 +79,9 @@ def randomize_rigid_body_material(
asset.body_physx_view.set_material_properties(materials, indices)
def add_body_mass(env: RLEnv, env_ids: torch.Tensor | None, mass_range: tuple[float, float], asset_cfg: SceneEntityCfg):
def add_body_mass(
env: BaseEnv, env_ids: torch.Tensor | None, mass_range: tuple[float, float], asset_cfg: SceneEntityCfg
):
"""Randomize the mass of the bodies by adding a random value sampled from the given range.
.. tip::
......@@ -107,7 +109,7 @@ def add_body_mass(env: RLEnv, env_ids: torch.Tensor | None, mass_range: tuple[fl
def apply_external_force_torque(
env: RLEnv,
env: BaseEnv,
env_ids: torch.Tensor,
force_range: tuple[float, float],
torque_range: tuple[float, float],
......@@ -137,7 +139,7 @@ def apply_external_force_torque(
def push_by_setting_velocity(
env: RLEnv,
env: BaseEnv,
env_ids: torch.Tensor,
velocity_range: dict[str, tuple[float, float]],
asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
......@@ -168,7 +170,7 @@ def push_by_setting_velocity(
def reset_root_state(
env: RLEnv,
env: BaseEnv,
env_ids: torch.Tensor,
pose_range: dict[str, tuple[float, float]],
velocity_range: dict[str, tuple[float, float]],
......@@ -219,7 +221,7 @@ def reset_root_state(
def reset_joints_by_scale(
env: RLEnv,
env: BaseEnv,
env_ids: torch.Tensor,
position_range: tuple[float, float],
velocity_range: tuple[float, float],
......@@ -241,3 +243,21 @@ def reset_joints_by_scale(
# set into the physics simulation
asset.write_joint_state_to_sim(joint_pos, joint_vel, env_ids=env_ids)
def reset_scene_to_default(env: BaseEnv, env_ids: torch.Tensor):
"""Reset the scene to the default state specified in the scene configuration."""
# root states
for rigid_object in env.scene.rigid_objects.values() + env.scene.articulations.values():
# obtain default and deal with the offset for env origins
default_root_state = rigid_object.data.default_root_state_w[env_ids].clone()
default_root_state[:, 0:3] += env.scene.env_origins[env_ids]
# set into the physics simulation
rigid_object.write_root_state_to_sim(default_root_state, env_ids=env_ids)
# joint states
for articulation_asset in env.scene.articulations.values():
# obtain default joint positions
default_joint_pos = articulation_asset.data.default_joint_pos[env_ids].clone()
default_joint_vel = articulation_asset.data.default_joint_vel[env_ids].clone()
# set into the physics simulation
articulation_asset.write_joint_state_to_sim(default_joint_pos, default_joint_vel, env_ids=env_ids)
......@@ -10,21 +10,18 @@ import gym
import math
import numpy as np
import torch
from typing import Any, ClassVar, Dict, Iterable, Tuple, Union
from typing import Any, ClassVar, Dict, Sequence, Tuple, Union
import omni.isaac.core.utils.torch as torch_utils
import omni.usd
from omni.isaac.orbit.managers import CurriculumManager, RandomizationManager, RewardManager, TerminationManager
from omni.isaac.orbit.command_generators import CommandGeneratorBase
from omni.isaac.orbit.managers import CurriculumManager, RewardManager, TerminationManager
from .base_env import BaseEnv
from .rl_env_cfg import RLEnvCfg
VecEnvIndices = Union[int, Iterable[int]]
"""Indices of the sub-environments. Used when we want to access one or more environments."""
VecEnvObs = Dict[str, Union[torch.Tensor, Dict[str, torch.Tensor]]]
"""Observation returned by the environment. It contains the observation for each sub-environment.
"""Observation returned by the environment.
The observations are stored in a dictionary. The keys are the group to which the observations belong.
This is useful for various learning setups beyond vanilla reinforcement learning, such as asymmetric
......@@ -43,11 +40,15 @@ Note:
For included frameworks (RSL-RL, RL-Games, skrl), the observations must have the key "policy". In case,
the key "critic" is also present, then the critic observations are taken from the "critic" group.
Otherwise, they are the same as the "policy" group.
"""
VecEnvStepReturn = Tuple[VecEnvObs, torch.Tensor, torch.Tensor, Dict]
"""The environment signals processed at the end of each step. It contains the observation, reward, termination
signal and additional information for each sub-environment."""
"""The environment signals processed at the end of each step.
It contains the observation, reward, termination signal and additional information for each sub-environment.
"""
class RLEnv(BaseEnv, gym.Env):
......@@ -106,7 +107,8 @@ class RLEnv(BaseEnv, gym.Env):
self.action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(action_dim,))
# perform randomization at the start of the simulation
self.randomization_manager.randomize(mode="startup")
if "startup" in self.randomization_manager.available_modes:
self.randomization_manager.randomize(mode="startup")
# extend UI elements
# we need to do this here after all the managers are initialized
# this is because they dictate the sensors and commands right now
......@@ -136,7 +138,11 @@ class RLEnv(BaseEnv, gym.Env):
"""
def load_managers(self):
# call the parent class to load the managers for observations, actions and commands.
# note: this order is important since observation manager needs to know the command and action managers
# -- command manager
self.command_manager: CommandGeneratorBase = self.cfg.commands.class_type(self.cfg.commands, self)
print("[INFO] Command Manager: ", self.command_manager)
# call the parent class to load the managers for observations and actions.
super().load_managers()
# prepare the managers
# -- reward manager
......@@ -148,29 +154,11 @@ class RLEnv(BaseEnv, gym.Env):
# -- curriculum manager
self.curriculum_manager = CurriculumManager(self.cfg.curriculum, self)
print("[INFO] Curriculum Manager: ", self.curriculum_manager)
# -- randomization manager
self.randomization_manager = RandomizationManager(self.cfg.randomization, self)
print("[INFO] Randomization Manager: ", self.randomization_manager)
"""
Operations - MDP
"""
@staticmethod
def seed(seed: int = -1) -> int:
"""Set the seed for the environment.
Args:
seed: The seed for random generator. Defaults to -1.
Returns:
The seed used for random generator.
"""
import omni.replicator.core as rep
rep.set_global_seed(seed)
return torch_utils.set_seed(seed)
def reset(self) -> VecEnvObs:
"""Resets all the environments and returns observations.
......@@ -243,7 +231,8 @@ class RLEnv(BaseEnv, gym.Env):
# -- update command
self.command_manager.compute(dt=self.step_dt)
# -- step interval randomization
self.randomization_manager.randomize(mode="interval", dt=self.step_dt)
if "interval" in self.randomization_manager.available_modes:
self.randomization_manager.randomize(mode="interval", dt=self.step_dt)
# return observations, rewards, resets and extras
return self.observation_manager.compute(), self.reward_buf, self.reset_buf, self.extras
......@@ -314,6 +303,7 @@ class RLEnv(BaseEnv, gym.Env):
if self._orbit_window is not None:
self._orbit_window.visible = False
self._orbit_window.destroy()
self._orbit_window = None
# update closing status
super().close()
......@@ -321,7 +311,7 @@ class RLEnv(BaseEnv, gym.Env):
Implementation specifics.
"""
def _reset_idx(self, env_ids: VecEnvIndices):
def _reset_idx(self, env_ids: Sequence[int]):
"""Reset environments based on specified indices.
Args:
......@@ -329,10 +319,11 @@ class RLEnv(BaseEnv, gym.Env):
"""
# update the curriculum for environments that need a reset
self.curriculum_manager.compute(env_ids=env_ids)
# randomize the MDP for environments that need a reset
self.randomization_manager.randomize(env_ids=env_ids, mode="reset")
# reset the internal buffers of the scene elements
self.scene.reset(env_ids)
# randomize the MDP for environments that need a reset
if "reset" in self.randomization_manager.available_modes:
self.randomization_manager.randomize(env_ids=env_ids, mode="reset")
# iterate over all managers and reset them
# this returns a dictionary of information which is stored in the extras
......
......@@ -7,6 +7,7 @@ from __future__ import annotations
from dataclasses import MISSING
from omni.isaac.orbit.command_generators import CommandGeneratorBaseCfg
from omni.isaac.orbit.utils import configclass
from .base_env_cfg import BaseEnvCfg
......@@ -25,7 +26,7 @@ class RLEnvCfg(BaseEnvCfg):
"""Reward settings."""
terminations: object = MISSING
"""Termination settings."""
randomization: object = MISSING
"""Randomization settings."""
curriculum: object = MISSING
"""Curriculum settings."""
commands: CommandGeneratorBaseCfg = MISSING
"""Command generator settings."""
......@@ -94,9 +94,17 @@ class RandomizationManager(ManagerBase):
@property
def active_terms(self) -> dict[str, list[str]]:
"""Name of active randomization terms."""
"""Name of active randomization terms.
The keys are the modes of randomization and the values are the names of the randomization terms.
"""
return self._mode_term_names
@property
def available_modes(self) -> list[str]:
"""Modes of randomization."""
return list(self._mode_term_names.keys())
"""
Operations.
"""
......
......@@ -69,9 +69,7 @@ class TerrainSceneCfg(InteractiveSceneCfg):
debug_vis=True,
mesh_prim_paths=["/World/ground"],
)
contact_forces = ContactSensorCfg(
prim_path="{ENV_REGEX_NS}/Robot/.*", history_length=3, track_air_time=True, debug_vis=True
)
contact_forces = ContactSensorCfg(prim_path="{ENV_REGEX_NS}/Robot/.*", history_length=3, track_air_time=True)
# lights
light = AssetBaseCfg(
prim_path="/World/light",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment