Unverified Commit a8d2bcad authored by Pascal Roth's avatar Pascal Roth Committed by GitHub

Adds option to change the clipping behavior for all Cameras and unifies the default (#891)

# Description

This PR adds an option to define the clipping behavior for depth images
generated by `RayCasterCamera` and `Camera`. In addition, it unifies the
clipping behavior for the depth images of all camera implementations.
Per default, all values exceeding the range are clipped to zero for both
`distance_to_image_plane` and `distance_to_camera` depth images. Prev.
`RayCasterCamera` clipped the values to the maximum value of the depth
image, `Camera` did not clip them and had a different behavior for both
types, and `TiledCamera` clipped the values to zero.

Needs PR #617 to be merged

## Type of change

- Bug fix (non-breaking change which fixes an issue)
- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

---------
Signed-off-by: 's avatarPascal Roth <57946385+pascal-roth@users.noreply.github.com>
Signed-off-by: 's avatarKelly Guo <kellyg@nvidia.com>
Co-authored-by: 's avatarMayank Mittal <12863862+Mayankm96@users.noreply.github.com>
Co-authored-by: 's avatarDavid Hoeller <dhoeller@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyg@nvidia.com>
Co-authored-by: 's avatarKelly Guo <kellyguo123@hotmail.com>
parent 97c73622
[package]
# Note: Semantic Versioning is used: https://semver.org/
version = "0.27.19"
version = "0.27.20"
# Description
title = "Isaac Lab framework for Robot Learning"
......
Changelog
---------
0.27.20 (2024-12-06)
~~~~~~~~~~~~~~~~~~~~
Added
^^^^^
* Added option to define the clipping behavior for depth images generated by
:class:`~omni.isaac.lab.sensors.RayCasterCamera`, :class:`~omni.isaac.lab.sensors.Camera`, and :class:`~omni.isaac.lab.sensors.TiledCamera`
Changed
^^^^^^^
* Unified the clipping behavior for the depth images of all camera implementations. Per default, all values exceeding
the range are clipped to zero for both ``distance_to_image_plane`` and ``distance_to_camera`` depth images. Prev.
:class:`~omni.isaac.lab.sensors.RayCasterCamera` clipped the values to the maximum value of the depth image,
:class:`~omni.isaac.lab.sensors.Camera` did not clip them and had a different behavior for both types.
0.27.19 (2024-12-05)
~~~~~~~~~~~~~~~~~~~~
......
......@@ -513,6 +513,19 @@ class Camera(SensorBase):
self._data.output[name][index] = data
# add info to output
self._data.info[index][name] = info
# NOTE: The `distance_to_camera` annotator returns the distance to the camera optical center. However,
# the replicator depth clipping is applied w.r.t. to the image plane which may result in values
# larger than the clipping range in the output. We apply an additional clipping to ensure values
# are within the clipping range for all the annotators.
if name == "distance_to_camera":
self._data.output[name][self._data.output[name] > self.cfg.spawn.clipping_range[1]] = torch.inf
# apply defined clipping behavior
if (
name == "distance_to_camera" or name == "distance_to_image_plane"
) and self.cfg.depth_clipping_behavior != "none":
self._data.output[name][torch.isinf(self._data.output[name])] = (
0.0 if self.cfg.depth_clipping_behavior == "zero" else self.cfg.spawn.clipping_range[1]
)
"""
Private Helpers
......@@ -631,6 +644,19 @@ class Camera(SensorBase):
self._data.info[index][name] = info
# concatenate the data along the batch dimension
self._data.output[name] = torch.stack(data_all_cameras, dim=0)
# NOTE: `distance_to_camera` and `distance_to_image_plane` are not both clipped to the maximum defined
# in the clipping range. The clipping is applied only to `distance_to_image_plane` and then both
# outputs are only clipped where the values in `distance_to_image_plane` exceed the threshold. To
# have a unified behavior between all cameras, we clip both outputs to the maximum value defined.
if name == "distance_to_camera":
self._data.output[name][self._data.output[name] > self.cfg.spawn.clipping_range[1]] = torch.inf
# clip the data if needed
if (
name == "distance_to_camera" or name == "distance_to_image_plane"
) and self.cfg.depth_clipping_behavior != "none":
self._data.output[name][torch.isinf(self._data.output[name])] = (
0.0 if self.cfg.depth_clipping_behavior == "zero" else self.cfg.spawn.clipping_range[1]
)
def _process_annotator_output(self, name: str, output: Any) -> tuple[torch.tensor, dict | None]:
"""Process the annotator output.
......
......@@ -53,6 +53,14 @@ class CameraCfg(SensorBaseCfg):
asset is already present in the scene.
"""
depth_clipping_behavior: Literal["max", "zero", "none"] = "zero"
"""Clipping behavior for the camera for values exceed the maximum value. Defaults to "zero".
- ``"max"``: Values are clipped to the maximum value.
- ``"zero"``: Values are clipped to zero.
- ``"none``: No clipping is applied. Values will be returned as ``inf``.
"""
data_types: list[str] = ["rgb"]
"""List of sensor names/types to enable for the camera. Defaults to ["rgb"].
......
......@@ -279,6 +279,22 @@ class TiledCamera(Camera):
if data_type == "rgba" and "rgb" in self.cfg.data_types:
self._data.output["rgb"] = self._data.output["rgba"][..., :3]
# NOTE: The `distance_to_camera` annotator returns the distance to the camera optical center. However,
# the replicator depth clipping is applied w.r.t. to the image plane which may result in values
# larger than the clipping range in the output. We apply an additional clipping to ensure values
# are within the clipping range for all the annotators.
if data_type == "distance_to_camera":
self._data.output[data_type][
self._data.output[data_type] > self.cfg.spawn.clipping_range[1]
] = torch.inf
# apply defined clipping behavior
if (
data_type == "distance_to_camera" or data_type == "distance_to_image_plane" or data_type == "depth"
) and self.cfg.depth_clipping_behavior != "none":
self._data.output[data_type][torch.isinf(self._data.output[data_type])] = (
0.0 if self.cfg.depth_clipping_behavior == "zero" else self.cfg.spawn.clipping_range[1]
)
"""
Private Helpers
"""
......
......@@ -297,14 +297,23 @@ class RayCasterCamera(RayCaster):
)
)[:, :, 0]
# apply the maximum distance after the transformation
distance_to_image_plane = torch.clip(distance_to_image_plane, max=self.cfg.max_distance)
if self.cfg.depth_clipping_behavior == "max":
distance_to_image_plane = torch.clip(distance_to_image_plane, max=self.cfg.max_distance)
distance_to_image_plane[torch.isnan(distance_to_image_plane)] = self.cfg.max_distance
elif self.cfg.depth_clipping_behavior == "zero":
distance_to_image_plane[distance_to_image_plane > self.cfg.max_distance] = 0.0
distance_to_image_plane[torch.isnan(distance_to_image_plane)] = 0.0
self._data.output["distance_to_image_plane"][env_ids] = distance_to_image_plane.view(
-1, *self.image_shape, 1
)
if "distance_to_camera" in self.cfg.data_types:
self._data.output["distance_to_camera"][env_ids] = torch.clip(
ray_depth.view(-1, *self.image_shape, 1), max=self.cfg.max_distance
)
if self.cfg.depth_clipping_behavior == "max":
ray_depth = torch.clip(ray_depth, max=self.cfg.max_distance)
elif self.cfg.depth_clipping_behavior == "zero":
ray_depth[ray_depth > self.cfg.max_distance] = 0.0
self._data.output["distance_to_camera"][env_ids] = ray_depth.view(-1, *self.image_shape, 1)
if "normals" in self.cfg.data_types:
self._data.output["normals"][env_ids] = ray_normal.view(-1, *self.image_shape, 3)
......
......@@ -46,6 +46,15 @@ class RayCasterCameraCfg(RayCasterCfg):
data_types: list[str] = ["distance_to_image_plane"]
"""List of sensor names/types to enable for the camera. Defaults to ["distance_to_image_plane"]."""
depth_clipping_behavior: Literal["max", "zero", "none"] = "zero"
"""Clipping behavior for the camera for values exceed the maximum value. Defaults to "zero".
- ``"max"``: Values are clipped to the maximum value.
- ``"zero"``: Values are clipped to zero.
- ``"none``: No clipping is applied. Values will be returned as ``inf`` for ``distance_to_camera`` and ``nan``
for ``distance_to_image_plane`` data type.
"""
pattern_cfg: PinholeCameraPatternCfg = MISSING
"""The pattern that defines the local ray starting positions and directions in a pinhole camera pattern."""
......
......@@ -393,6 +393,137 @@ class TestCamera(unittest.TestCase):
torch.testing.assert_close(rs_intrinsic_matrix[0, 0, 0], camera.data.intrinsic_matrices[0, 0, 0])
# torch.testing.assert_close(rs_intrinsic_matrix[0, 1, 1], camera.data.intrinsic_matrices[0, 1, 1])
def test_depth_clipping(self):
"""Test depth clipping.
.. note::
This test is the same for all camera models to enforce the same clipping behavior.
"""
# get camera cfgs
camera_cfg_zero = CameraCfg(
prim_path="/World/CameraZero",
offset=CameraCfg.OffsetCfg(pos=(2.5, 2.5, 6.0), rot=(-0.125, 0.362, 0.873, -0.302), convention="ros"),
spawn=sim_utils.PinholeCameraCfg().from_intrinsic_matrix(
focal_length=38.0,
intrinsic_matrix=[380.08, 0.0, 467.79, 0.0, 380.08, 262.05, 0.0, 0.0, 1.0],
height=540,
width=960,
clipping_range=(0.1, 10),
),
height=540,
width=960,
data_types=["distance_to_image_plane", "distance_to_camera"],
depth_clipping_behavior="zero",
)
camera_zero = Camera(camera_cfg_zero)
camera_cfg_none = copy.deepcopy(camera_cfg_zero)
camera_cfg_none.prim_path = "/World/CameraNone"
camera_cfg_none.depth_clipping_behavior = "none"
camera_none = Camera(camera_cfg_none)
camera_cfg_max = copy.deepcopy(camera_cfg_zero)
camera_cfg_max.prim_path = "/World/CameraMax"
camera_cfg_max.depth_clipping_behavior = "max"
camera_max = Camera(camera_cfg_max)
# Play sim
self.sim.reset()
# note: This is a workaround to ensure that the textures are loaded.
# Check "Known Issues" section in the documentation for more details.
for _ in range(5):
self.sim.step()
camera_zero.update(self.dt)
camera_none.update(self.dt)
camera_max.update(self.dt)
# none clipping should contain inf values
self.assertTrue(torch.isinf(camera_none.data.output["distance_to_camera"]).any())
self.assertTrue(torch.isinf(camera_none.data.output["distance_to_image_plane"]).any())
self.assertTrue(
camera_none.data.output["distance_to_camera"][
~torch.isinf(camera_none.data.output["distance_to_camera"])
].min()
>= camera_cfg_zero.spawn.clipping_range[0]
)
self.assertTrue(
camera_none.data.output["distance_to_camera"][
~torch.isinf(camera_none.data.output["distance_to_camera"])
].max()
<= camera_cfg_zero.spawn.clipping_range[1]
)
self.assertTrue(
camera_none.data.output["distance_to_image_plane"][
~torch.isinf(camera_none.data.output["distance_to_image_plane"])
].min()
>= camera_cfg_zero.spawn.clipping_range[0]
)
self.assertTrue(
camera_none.data.output["distance_to_image_plane"][
~torch.isinf(camera_none.data.output["distance_to_camera"])
].max()
<= camera_cfg_zero.spawn.clipping_range[1]
)
# zero clipping should result in zero values
self.assertTrue(
torch.all(
camera_zero.data.output["distance_to_camera"][
torch.isinf(camera_none.data.output["distance_to_camera"])
]
== 0.0
)
)
self.assertTrue(
torch.all(
camera_zero.data.output["distance_to_image_plane"][
torch.isinf(camera_none.data.output["distance_to_image_plane"])
]
== 0.0
)
)
self.assertTrue(
camera_zero.data.output["distance_to_camera"][camera_zero.data.output["distance_to_camera"] != 0.0].min()
>= camera_cfg_zero.spawn.clipping_range[0]
)
self.assertTrue(camera_zero.data.output["distance_to_camera"].max() <= camera_cfg_zero.spawn.clipping_range[1])
self.assertTrue(
camera_zero.data.output["distance_to_image_plane"][
camera_zero.data.output["distance_to_image_plane"] != 0.0
].min()
>= camera_cfg_zero.spawn.clipping_range[0]
)
self.assertTrue(
camera_zero.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.spawn.clipping_range[1]
)
# max clipping should result in max values
self.assertTrue(
torch.all(
camera_max.data.output["distance_to_camera"][torch.isinf(camera_none.data.output["distance_to_camera"])]
== camera_cfg_zero.spawn.clipping_range[1]
)
)
self.assertTrue(
torch.all(
camera_max.data.output["distance_to_image_plane"][
torch.isinf(camera_none.data.output["distance_to_image_plane"])
]
== camera_cfg_zero.spawn.clipping_range[1]
)
)
self.assertTrue(camera_max.data.output["distance_to_camera"].min() >= camera_cfg_zero.spawn.clipping_range[0])
self.assertTrue(camera_max.data.output["distance_to_camera"].max() <= camera_cfg_zero.spawn.clipping_range[1])
self.assertTrue(
camera_max.data.output["distance_to_image_plane"].min() >= camera_cfg_zero.spawn.clipping_range[0]
)
self.assertTrue(
camera_max.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.spawn.clipping_range[1]
)
def test_camera_resolution_all_colorize(self):
"""Test camera resolution is correctly set for all types with colorization enabled."""
# Add all types
......
......@@ -152,6 +152,107 @@ class TestWarpCamera(unittest.TestCase):
im_data.shape == (1, self.camera_cfg.pattern_cfg.height, self.camera_cfg.pattern_cfg.width, 1)
)
def test_depth_clipping(self):
"""Test depth clipping.
.. note::
This test is the same for all camera models to enforce the same clipping behavior.
"""
prim_utils.create_prim("/World/CameraZero", "Xform")
prim_utils.create_prim("/World/CameraNone", "Xform")
prim_utils.create_prim("/World/CameraMax", "Xform")
# get camera cfgs
camera_cfg_zero = RayCasterCameraCfg(
prim_path="/World/CameraZero",
mesh_prim_paths=["/World/defaultGroundPlane"],
offset=RayCasterCameraCfg.OffsetCfg(
pos=(2.5, 2.5, 6.0), rot=(0.9914449, 0.0, 0.1305, 0.0), convention="world"
),
pattern_cfg=patterns.PinholeCameraPatternCfg().from_intrinsic_matrix(
focal_length=38.0,
intrinsic_matrix=[380.08, 0.0, 467.79, 0.0, 380.08, 262.05, 0.0, 0.0, 1.0],
height=540,
width=960,
),
max_distance=10.0,
data_types=["distance_to_image_plane", "distance_to_camera"],
depth_clipping_behavior="zero",
)
camera_zero = RayCasterCamera(camera_cfg_zero)
camera_cfg_none = copy.deepcopy(camera_cfg_zero)
camera_cfg_none.prim_path = "/World/CameraNone"
camera_cfg_none.depth_clipping_behavior = "none"
camera_none = RayCasterCamera(camera_cfg_none)
camera_cfg_max = copy.deepcopy(camera_cfg_zero)
camera_cfg_max.prim_path = "/World/CameraMax"
camera_cfg_max.depth_clipping_behavior = "max"
camera_max = RayCasterCamera(camera_cfg_max)
# Play sim
self.sim.reset()
camera_zero.update(self.dt)
camera_none.update(self.dt)
camera_max.update(self.dt)
# none clipping should contain inf values
self.assertTrue(torch.isinf(camera_none.data.output["distance_to_camera"]).any())
self.assertTrue(torch.isnan(camera_none.data.output["distance_to_image_plane"]).any())
self.assertTrue(
camera_none.data.output["distance_to_camera"][
~torch.isinf(camera_none.data.output["distance_to_camera"])
].max()
> camera_cfg_zero.max_distance
)
self.assertTrue(
camera_none.data.output["distance_to_image_plane"][
~torch.isnan(camera_none.data.output["distance_to_image_plane"])
].max()
> camera_cfg_zero.max_distance
)
# zero clipping should result in zero values
self.assertTrue(
torch.all(
camera_zero.data.output["distance_to_camera"][
torch.isinf(camera_none.data.output["distance_to_camera"])
]
== 0.0
)
)
self.assertTrue(
torch.all(
camera_zero.data.output["distance_to_image_plane"][
torch.isnan(camera_none.data.output["distance_to_image_plane"])
]
== 0.0
)
)
self.assertTrue(camera_zero.data.output["distance_to_camera"].max() <= camera_cfg_zero.max_distance)
self.assertTrue(camera_zero.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.max_distance)
# max clipping should result in max values
self.assertTrue(
torch.all(
camera_max.data.output["distance_to_camera"][torch.isinf(camera_none.data.output["distance_to_camera"])]
== camera_cfg_zero.max_distance
)
)
self.assertTrue(
torch.all(
camera_max.data.output["distance_to_image_plane"][
torch.isnan(camera_none.data.output["distance_to_image_plane"])
]
== camera_cfg_zero.max_distance
)
)
self.assertTrue(camera_max.data.output["distance_to_camera"].max() <= camera_cfg_zero.max_distance)
self.assertTrue(camera_max.data.output["distance_to_image_plane"].max() <= camera_cfg_zero.max_distance)
def test_camera_init_offset(self):
"""Test camera initialization with offset using different conventions."""
# define the same offset in all conventions
......
......@@ -120,6 +120,117 @@ class TestTiledCamera(unittest.TestCase):
self.assertGreater(im_data.mean().item(), 0.0)
del camera
def test_depth_clipping_max(self):
"""Test depth max clipping."""
# get camera cfgs
camera_cfg = TiledCameraCfg(
prim_path="/World/Camera",
offset=TiledCameraCfg.OffsetCfg(pos=(2.5, 2.5, 6.0), rot=(-0.125, 0.362, 0.873, -0.302), convention="ros"),
spawn=sim_utils.PinholeCameraCfg().from_intrinsic_matrix(
focal_length=38.0,
intrinsic_matrix=[380.08, 0.0, 467.79, 0.0, 380.08, 262.05, 0.0, 0.0, 1.0],
height=540,
width=960,
clipping_range=(4.9, 5.0),
),
height=540,
width=960,
data_types=["depth"],
depth_clipping_behavior="max",
)
camera = TiledCamera(camera_cfg)
# Play sim
self.sim.reset()
# note: This is a workaround to ensure that the textures are loaded.
# Check "Known Issues" section in the documentation for more details.
for _ in range(5):
self.sim.step()
camera.update(self.dt)
self.assertTrue(len(camera.data.output["depth"][torch.isinf(camera.data.output["depth"])]) == 0)
self.assertTrue(camera.data.output["depth"].min() >= camera_cfg.spawn.clipping_range[0])
self.assertTrue(camera.data.output["depth"].max() <= camera_cfg.spawn.clipping_range[1])
del camera
def test_depth_clipping_none(self):
"""Test depth none clipping."""
# get camera cfgs
camera_cfg = TiledCameraCfg(
prim_path="/World/Camera",
offset=TiledCameraCfg.OffsetCfg(pos=(2.5, 2.5, 6.0), rot=(-0.125, 0.362, 0.873, -0.302), convention="ros"),
spawn=sim_utils.PinholeCameraCfg().from_intrinsic_matrix(
focal_length=38.0,
intrinsic_matrix=[380.08, 0.0, 467.79, 0.0, 380.08, 262.05, 0.0, 0.0, 1.0],
height=540,
width=960,
clipping_range=(4.9, 5.0),
),
height=540,
width=960,
data_types=["depth"],
depth_clipping_behavior="none",
)
camera = TiledCamera(camera_cfg)
# Play sim
self.sim.reset()
# note: This is a workaround to ensure that the textures are loaded.
# Check "Known Issues" section in the documentation for more details.
for _ in range(5):
self.sim.step()
camera.update(self.dt)
self.assertTrue(len(camera.data.output["depth"][torch.isinf(camera.data.output["depth"])]) > 0)
self.assertTrue(camera.data.output["depth"].min() >= camera_cfg.spawn.clipping_range[0])
self.assertTrue(
camera.data.output["depth"][~torch.isinf(camera.data.output["depth"])].max()
<= camera_cfg.spawn.clipping_range[1]
)
del camera
def test_depth_clipping_zero(self):
"""Test depth zero clipping."""
# get camera cfgs
camera_cfg = TiledCameraCfg(
prim_path="/World/Camera",
offset=TiledCameraCfg.OffsetCfg(pos=(2.5, 2.5, 6.0), rot=(-0.125, 0.362, 0.873, -0.302), convention="ros"),
spawn=sim_utils.PinholeCameraCfg().from_intrinsic_matrix(
focal_length=38.0,
intrinsic_matrix=[380.08, 0.0, 467.79, 0.0, 380.08, 262.05, 0.0, 0.0, 1.0],
height=540,
width=960,
clipping_range=(4.9, 5.0),
),
height=540,
width=960,
data_types=["depth"],
depth_clipping_behavior="zero",
)
camera = TiledCamera(camera_cfg)
# Play sim
self.sim.reset()
# note: This is a workaround to ensure that the textures are loaded.
# Check "Known Issues" section in the documentation for more details.
for _ in range(5):
self.sim.step()
camera.update(self.dt)
self.assertTrue(len(camera.data.output["depth"][torch.isinf(camera.data.output["depth"])]) == 0)
self.assertTrue(camera.data.output["depth"].min() == 0.0)
self.assertTrue(camera.data.output["depth"].max() <= camera_cfg.spawn.clipping_range[1])
del camera
def test_multi_camera_init(self):
"""Test multi-camera initialization."""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment