Unverified Commit 3692aced authored by ooctipus's avatar ooctipus Committed by GitHub

Supports sb3 wrapper to pre-process env's image obs-space to trigger sb3...

Supports sb3 wrapper to pre-process env's image obs-space to trigger sb3 natively supported cnn creation pipeline (#2812)

# Description

This PR modifies the SB3 Wrapper so that it utilizes the 
SB3 natively supported encoder creation on properly defined composite
observation space,

SB3's automatic CNN encoding will apply when
1. if observation space of that term is of shape, GrayScale, RGB, or
RGBD
2. if agent_cfg has normalized flag, expects data to have channel as the
first dimension, and data pre_normalized
3. if agent doesn't has normalized flag, expects data to have space
min=0, max=255, dtype=uint8

This PR makes sure the sb3 wrapper adjust environment image term to meet
either condition 2 or condition 3 by looking at space's min and max, so
sb3 creation pipeline will be applied automatically.


## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- New feature (non-breaking change which adds functionality)

## Screenshots

Please attach before and after screenshots of the change if applicable.

<!--
Example:

| Before | After |
| ------ | ----- |
| _gif/png before_ | _gif/png after_ |

To upload images to a PR -- simply drag and drop an image while in edit
mode and it should upload the image directly. You can then paste that
source into the above before/after sections.
-->

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
parent d02d3b8a
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.1.7"
version = "0.1.8"
# Description
title = "Isaac Lab RL"
......
Changelog
---------
0.1.8 (2025-06-29)
~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Support SB3 VecEnv wrapper to configure with composite observation spaces properly so that the cnn creation pipelines
natively supported by sb3 can be automatically triggered
0.1.7 (2025-06-30)
~~~~~~~~~~~~~~~~~~
......
......@@ -25,6 +25,7 @@ import torch.nn as nn # noqa: F401
import warnings
from typing import Any
from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
from stable_baselines3.common.utils import constant_fn
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvObs, VecEnvStepReturn
......@@ -156,17 +157,8 @@ class Sb3VecEnvWrapper(VecEnv):
self.num_envs = self.unwrapped.num_envs
self.sim_device = self.unwrapped.device
self.render_mode = self.unwrapped.render_mode
# obtain gym spaces
# note: stable-baselines3 does not like when we have unbounded action space so
# we set it to some high value here. Maybe this is not general but something to think about.
observation_space = self.unwrapped.single_observation_space["policy"]
action_space = self.unwrapped.single_action_space
if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)
# initialize vec-env
VecEnv.__init__(self, self.num_envs, observation_space, action_space)
self.observation_processors = {}
self._process_spaces()
# add buffer for logging episodic information
self._ep_rew_buf = np.zeros(self.num_envs)
self._ep_len_buf = np.zeros(self.num_envs)
......@@ -303,6 +295,58 @@ class Sb3VecEnvWrapper(VecEnv):
Helper functions.
"""
def _process_spaces(self):
# process observation space
observation_space = self.unwrapped.single_observation_space["policy"]
if isinstance(observation_space, gym.spaces.Dict):
for obs_key, obs_space in observation_space.spaces.items():
processors: list[callable[[torch.Tensor], Any]] = []
# assume normalized, if not, it won't pass is_image_space, which check [0-255].
# for scale like image space that has right shape but not scaled, we will scale it later
if is_image_space(obs_space, check_channels=True, normalized_image=True):
actually_normalized = np.all(obs_space.low == -1.0) and np.all(obs_space.high == 1.0)
if not actually_normalized:
if np.any(obs_space.low != 0) or np.any(obs_space.high != 255):
raise ValueError(
"Your image observation is not normalized in environment, and will not be"
"normalized by sb3 if its min is not 0 and max is not 255."
)
# sb3 will handle normalization and transpose, but sb3 expects uint8 images
if obs_space.dtype != np.uint8:
processors.append(lambda obs: obs.to(torch.uint8))
observation_space.spaces[obs_key] = gym.spaces.Box(0, 255, obs_space.shape, np.uint8)
else:
# sb3 will NOT handle the normalization, while sb3 will transpose, its transpose applies to all
# image terms and maybe non-ideal, more, if we can do it in torch on gpu, it will be faster then
# sb3 transpose it in numpy with cpu.
if not is_image_space_channels_first(obs_space):
def tranp(img: torch.Tensor) -> torch.Tensor:
return img.permute(2, 0, 1) if len(img.shape) == 3 else img.permute(0, 3, 1, 2)
processors.append(tranp)
h, w, c = obs_space.shape
observation_space.spaces[obs_key] = gym.spaces.Box(-1.0, 1.0, (c, h, w), obs_space.dtype)
def chained_processor(obs: torch.Tensor, procs=processors) -> Any:
for proc in procs:
obs = proc(obs)
return obs
# add processor to the dictionary
if len(processors) > 0:
self.observation_processors[obs_key] = chained_processor
# obtain gym spaces
# note: stable-baselines3 does not like when we have unbounded action space so
# we set it to some high value here. Maybe this is not general but something to think about.
action_space = self.unwrapped.single_action_space
if isinstance(action_space, gym.spaces.Box) and not action_space.is_bounded("both"):
action_space = gym.spaces.Box(low=-100, high=100, shape=action_space.shape)
# initialize vec-env
VecEnv.__init__(self, self.num_envs, observation_space, action_space)
def _process_obs(self, obs_dict: torch.Tensor | dict[str, torch.Tensor]) -> np.ndarray | dict[str, np.ndarray]:
"""Convert observations into NumPy data type."""
# Sb3 doesn't support asymmetric observation spaces, so we only use "policy"
......@@ -310,7 +354,9 @@ class Sb3VecEnvWrapper(VecEnv):
# note: ManagerBasedRLEnv uses torch backend (by default).
if isinstance(obs, dict):
for key, value in obs.items():
obs[key] = value.detach().cpu().numpy()
if key in self.observation_processors:
obs[key] = self.observation_processors[key](value)
obs[key] = obs[key].detach().cpu().numpy()
elif isinstance(obs, torch.Tensor):
obs = obs.detach().cpu().numpy()
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment