Unverified Commit f4982455 authored by ooctipus's avatar ooctipus Committed by GitHub

Separates per-step termination and last-episode termination bookkeeping (#3745)

# Description

This PR fixes the issue where get_done_term returned last episode value
rather than current step value.

This PR realizes values used for get_term should be different from that
used for logging, and mixed useage leads to non-intuitive behavior.

using per-step value for logging leads to overcounting and undercounting
reported in #2977
using last-episode value for get_term leads to misalignment with
expectation reported in #3720

Fixes #2977 #3720 

---

The logging behavior remains *mostly* the same as #3107, and and also
got rid of the weird overwriting behavior(yay).
I get exactly the same termination curve as #3107 when run on
`Isaac-Velocity-Rough-Anymal-C-v0`

Here is a benchmark summary with 1000 steps running
`Isaac-Velocity-Rough-Anymal-C-v0 ` with 4096 envs

Before #3107: 

`| termination.compute     |       0.229 ms|`
`| termination.reset       |       0.007 ms|`

PR #3107: 

`| termination.compute     |       0.274 ms|`
`| termination.reset       |       0.004 ms|`

This PR: 

`| termination.compute     |       0.258 ms|`
`| termination.reset       |       0.004 ms|`

We actually see improvement, this is due to the fact that expensive
maintenance of last_episode_value is only computed once per
compute(#3107 computes last_episode_value for every term)

## Type of change

- Bug fix (non-breaking change which fixes an issue)

## Checklist

- [x] I have read and understood the [contribution
guidelines](https://isaac-sim.github.io/IsaacLab/main/source/refs/contributing.html)
- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

---------
Signed-off-by: 's avatarKelly Guo <kellyg@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
parent c8b6d227
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.47.8"
version = "0.47.9"
# Description
title = "Isaac Lab framework for Robot Learning"
......
Changelog
---------
0.47.9 (2025-11-05)
~~~~~~~~~~~~~~~~~~~
Changed
^^^^^^^
* Fixed termination term bookkeeping in :class:`~isaaclab.managers.TerminationManager`:
per-step termination and last-episode termination bookkeeping are now separated.
last-episode dones are now updated once per step from all term outputs, avoiding per-term overwrites
and ensuring Episode_Termination metrics reflect the actual triggering terms.
0.47.8 (2025-11-06)
~~~~~~~~~~~~~~~~~~~
......
......@@ -63,6 +63,8 @@ class TerminationManager(ManagerBase):
self._term_name_to_term_idx = {name: i for i, name in enumerate(self._term_names)}
# prepare extra info to store individual termination term information
self._term_dones = torch.zeros((self.num_envs, len(self._term_names)), device=self.device, dtype=torch.bool)
# prepare extra info to store last episode done per termination term information
self._last_episode_dones = torch.zeros_like(self._term_dones)
# create buffer for managing termination per environment
self._truncated_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
self._terminated_buf = torch.zeros_like(self._truncated_buf)
......@@ -138,7 +140,7 @@ class TerminationManager(ManagerBase):
env_ids = slice(None)
# add to episode dict
extras = {}
last_episode_done_stats = self._term_dones.float().mean(dim=0)
last_episode_done_stats = self._last_episode_dones.float().mean(dim=0)
for i, key in enumerate(self._term_names):
# store information
extras["Episode_Termination/" + key] = last_episode_done_stats[i].item()
......@@ -169,15 +171,17 @@ class TerminationManager(ManagerBase):
else:
self._terminated_buf |= value
# add to episode dones
rows = value.nonzero(as_tuple=True)[0] # indexing is cheaper than boolean advance indexing
if rows.numel() > 0:
self._term_dones[rows] = False
self._term_dones[rows, i] = True
self._term_dones[:, i] = value
# update last-episode dones once per compute: for any env where a term fired,
# reflect exactly which term(s) fired this step and clear others
rows = self._term_dones.any(dim=1).nonzero(as_tuple=True)[0]
if rows.numel() > 0:
self._last_episode_dones[rows] = self._term_dones[rows]
# return combined termination signal
return self._truncated_buf | self._terminated_buf
def get_term(self, name: str) -> torch.Tensor:
"""Returns the termination term with the specified name.
"""Returns the termination term value at current step with the specified name.
Args:
name: The name of the termination term.
......@@ -190,7 +194,8 @@ class TerminationManager(ManagerBase):
def get_active_iterable_terms(self, env_idx: int) -> Sequence[tuple[str, Sequence[float]]]:
"""Returns the active terms as iterable sequence of tuples.
The first element of the tuple is the name of the term and the second element is the raw value(s) of the term.
The first element of the tuple is the name of the term and the second element is the raw value(s) of the term
recorded at current step.
Args:
env_idx: The specific environment to pull the active terms from.
......
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
"""Launch Isaac Sim Simulator first."""
from isaaclab.app import AppLauncher
# launch omniverse app
simulation_app = AppLauncher(headless=True).app
"""Rest everything follows."""
import torch
import pytest
from isaaclab.managers import TerminationManager, TerminationTermCfg
from isaaclab.sim import SimulationContext
class DummyEnv:
"""Minimal mutable env stub for the termination manager tests."""
def __init__(self, num_envs: int, device: str, sim: SimulationContext):
self.num_envs = num_envs
self.device = device
self.sim = sim
self.counter = 0 # mutable step counter used by test terms
def fail_every_5_steps(env) -> torch.Tensor:
"""Returns True for all envs when counter is a positive multiple of 5."""
cond = env.counter > 0 and (env.counter % 5 == 0)
return torch.full((env.num_envs,), cond, dtype=torch.bool, device=env.device)
def fail_every_10_steps(env) -> torch.Tensor:
"""Returns True for all envs when counter is a positive multiple of 10."""
cond = env.counter > 0 and (env.counter % 10 == 0)
return torch.full((env.num_envs,), cond, dtype=torch.bool, device=env.device)
def fail_every_3_steps(env) -> torch.Tensor:
"""Returns True for all envs when counter is a positive multiple of 3."""
cond = env.counter > 0 and (env.counter % 3 == 0)
return torch.full((env.num_envs,), cond, dtype=torch.bool, device=env.device)
@pytest.fixture
def env():
sim = SimulationContext()
return DummyEnv(num_envs=20, device="cpu", sim=sim)
def test_initial_state_and_shapes(env):
cfg = {
"term_5": TerminationTermCfg(func=fail_every_5_steps),
"term_10": TerminationTermCfg(func=fail_every_10_steps),
}
tm = TerminationManager(cfg, env)
# Active term names
assert tm.active_terms == ["term_5", "term_10"]
# Internal buffers have expected shapes and start as all False
assert tm._term_dones.shape == (env.num_envs, 2)
assert tm._last_episode_dones.shape == (env.num_envs, 2)
assert tm.dones.shape == (env.num_envs,)
assert tm.time_outs.shape == (env.num_envs,)
assert tm.terminated.shape == (env.num_envs,)
assert torch.all(~tm._term_dones) and torch.all(~tm._last_episode_dones)
def test_term_transitions_and_persistence(env):
"""Concise transitions: single fire, persist, switch, both, persist.
Uses 3-step and 5-step terms and verifies current-step values and last-episode persistence.
"""
cfg = {
"term_3": TerminationTermCfg(func=fail_every_3_steps, time_out=False),
"term_5": TerminationTermCfg(func=fail_every_5_steps, time_out=False),
}
tm = TerminationManager(cfg, env)
# step 3: only term_3 -> last_episode [True, False]
env.counter = 3
out = tm.compute()
assert torch.all(tm.get_term("term_3")) and torch.all(~tm.get_term("term_5"))
assert torch.all(out)
assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(~tm._last_episode_dones[:, 1])
# step 4: none -> last_episode persists [True, False]
env.counter = 4
out = tm.compute()
assert torch.all(~out)
assert torch.all(~tm.get_term("term_3")) and torch.all(~tm.get_term("term_5"))
assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(~tm._last_episode_dones[:, 1])
# step 5: only term_5 -> last_episode [False, True]
env.counter = 5
out = tm.compute()
assert torch.all(~tm.get_term("term_3")) and torch.all(tm.get_term("term_5"))
assert torch.all(out)
assert torch.all(~tm._last_episode_dones[:, 0]) and torch.all(tm._last_episode_dones[:, 1])
# step 15: both -> last_episode [True, True]
env.counter = 15
out = tm.compute()
assert torch.all(tm.get_term("term_3")) and torch.all(tm.get_term("term_5"))
assert torch.all(out)
assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(tm._last_episode_dones[:, 1])
# step 16: none -> persist [True, True]
env.counter = 16
out = tm.compute()
assert torch.all(~out)
assert torch.all(~tm.get_term("term_3")) and torch.all(~tm.get_term("term_5"))
assert torch.all(tm._last_episode_dones[:, 0]) and torch.all(tm._last_episode_dones[:, 1])
def test_time_out_vs_terminated_split(env):
cfg = {
"term_5": TerminationTermCfg(func=fail_every_5_steps, time_out=False), # terminated
"term_10": TerminationTermCfg(func=fail_every_10_steps, time_out=True), # timeout
}
tm = TerminationManager(cfg, env)
# Step 5: terminated fires, not timeout
env.counter = 5
out = tm.compute()
assert torch.all(out)
assert torch.all(tm.terminated) and torch.all(~tm.time_outs)
# Step 10: both fire; timeout and terminated both True
env.counter = 10
out = tm.compute()
assert torch.all(out)
assert torch.all(tm.terminated) and torch.all(tm.time_outs)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment