Unverified Commit f774425b authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Adds action clipping to rsl-rl wrapper (#2019)

# Description

Currently, the actions from the policy are directly applied to the
environment and also often fed back to the policy using the last action
as observation.

Doing this can lead to instability during training since applying a
large action can introduce a negative feedback loop.
More specifically, applying a very large action leads to a large
last_action observations, which often results in a large error in the
critic, which can lead to even larger actions being sampled in the
future.

This PR aims to fix this for RSL-RL library, by clipping the actions to
(large) hard limits before applying them to the environment. This
prohibits the actions from growing continuously and greatly improves
training stability.

Fixes #984, #1732, #1999

## Type of change

- Bug fix (non-breaking change which fixes an issue)
- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
parent e6f63e21
......@@ -106,7 +106,7 @@ def main():
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env)
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
......
......@@ -124,7 +124,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env)
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
# create runner from rsl-rl
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
......
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.1.0"
version = "0.1.1"
# Description
title = "Isaac Lab RL"
......
Changelog
---------
0.1.1 (2025-03-10)
~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added a parameter to clip the actions in the action space inside the RSL-RL wrapper.
This parameter is set to None by default, which is the same as not clipping the actions.
* Added attribute :attr:`isaaclab_rl.rsl_rl.RslRlOnPolicyRunnerCfg.clip_actions` to set
the clipping range for the actions in the RSL-RL on-policy runner.
0.1.0 (2024-12-27)
~~~~~~~~~~~~~~~~~~
......
......@@ -98,6 +98,9 @@ class RslRlOnPolicyRunnerCfg:
algorithm: RslRlPpoAlgorithmCfg = MISSING
"""The algorithm configuration."""
clip_actions: float | None = None
"""The clipping value for actions. If ``None``, then no clipping is done."""
##
# Checkpointing parameters
##
......
......@@ -30,7 +30,7 @@ class RslRlVecEnvWrapper(VecEnv):
https://github.com/leggedrobotics/rsl_rl/blob/master/rsl_rl/env/vec_env.py
"""
def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | None = None):
"""Initializes the wrapper.
Note:
......@@ -38,6 +38,7 @@ class RslRlVecEnvWrapper(VecEnv):
Args:
env: The environment to wrap around.
clip_actions: The clipping value for actions. If ``None``, then no clipping is done.
Raises:
ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`.
......@@ -50,10 +51,17 @@ class RslRlVecEnvWrapper(VecEnv):
)
# initialize the wrapper
self.env = env
self.clip_actions = clip_actions
# store information required by wrapper
self.num_envs = self.unwrapped.num_envs
self.device = self.unwrapped.device
self.max_episode_length = self.unwrapped.max_episode_length
# modify the action space to the clip range
self._modify_action_space()
# obtain dimensions of the environment
if hasattr(self.unwrapped, "action_manager"):
self.num_actions = self.unwrapped.action_manager.total_action_dim
else:
......@@ -72,6 +80,7 @@ class RslRlVecEnvWrapper(VecEnv):
self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
else:
self.num_privileged_obs = 0
# reset at the start since the RSL-RL runner does not call reset
self.env.reset()
......@@ -160,6 +169,9 @@ class RslRlVecEnvWrapper(VecEnv):
return obs_dict["policy"], {"observations": obs_dict}
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
# clip actions
if self.clip_actions is not None:
actions = torch.clamp(actions, -self.clip_actions, self.clip_actions)
# record step information
obs_dict, rew, terminated, truncated, extras = self.env.step(actions)
# compute dones for compatibility with RSL-RL
......@@ -177,3 +189,21 @@ class RslRlVecEnvWrapper(VecEnv):
def close(self): # noqa: D102
return self.env.close()
"""
Helper functions
"""
def _modify_action_space(self):
"""Modifies the action space to the clip range."""
if self.clip_actions is None:
return
# modify the action space to the clip range
# note: this is only possible for the box action space. we need to change it in the future for other action spaces.
self.env.unwrapped.single_action_space = gym.spaces.Box(
low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,)
)
self.env.unwrapped.action_space = gym.vector.utils.batch_space(
self.env.unwrapped.single_action_space, self.num_envs
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment