Adds raycasting-based camera implementation (#159)

This MR adds the `RayCasterCamera` as a ray-cast-based camera for "distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and functionalities as the USD Camera wrapper, while it is, on average, 30% faster. Fixes #131 - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - This change requires a documentation update - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file --------- Signed-off-by: Pascal Roth <57946385+pascal-roth@users.noreply.github.com> Co-authored-by: Mayank Mittal <mittalma@leggedrobotics.com>

Adds raycasting-based camera implementation (#159)
This MR adds the `RayCasterCamera` as a ray-cast-based camera for "distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and functionalities as the USD Camera wrapper, while it is, on average, 30% faster. Fixes #131 - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - This change requires a documentation update - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file --------- Signed-off-by: Pascal Roth <57946385+pascal-roth@users.noreply.github.com> Co-authored-by: Mayank Mittal <mittalma@leggedrobotics.com>
5c3fa70f · Pascal Roth · Mayank Mittal · e7d43de3 · 5c3fa70f · 5c3fa70f
Commit 5c3fa70f authored Nov 01, 2023 by Pascal Roth Committed by Mayank Mittal Nov 01, 2023
20 changed files
--- a/source/extensions/omni.isaac.orbit/config/extension.toml
+++ b/source/extensions/omni.isaac.orbit/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.9.30"
+version = "0.9.31"
 # Description
 title = "ORBIT framework for Robot Learning"

--- a/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
 Changelog
 ---------
+0.9.31 (2023-11-02)
+~~~~~~~~~~~~~~~~~~~
+Added
+^^^^^
+* Added the :class:`omni.isaac.orbit.sensors.RayCasterCamera` class, as a ray-casting based camera for
+  "distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and
+  functionalities as the USD Camera while it is on average 30% faster.
 0.9.30 (2023-11-01)
 ~~~~~~~~~~~~~~~~~~~
@@ -155,7 +166,6 @@ Added
 0.9.18 (2023-10-23)
-~~~~~~~~~~~~~~~~~~~
 Added
 ^^^^^

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/markers/config/__init__.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/markers/config/__init__.py
@@ -19,7 +19,7 @@ RAY_CASTER_MARKER_CFG = VisualizationMarkersCfg(
            radius=0.02,
            visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
        ),
-    }
+    },
 )
 """Configuration for the ray-caster marker."""

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_data.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_data.py
@@ -10,6 +10,8 @@ from dataclasses import dataclass
 from tensordict import TensorDict
 from typing import Any
+from .utils import convert_orientation_convention
 @dataclass
 class CameraData:
@@ -25,15 +27,6 @@ class CameraData:
    Shape is (N, 3) where ``N`` is the number of sensors.
    """
-    quat_w_ros: torch.Tensor = None
-    """Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following ROS convention.
-    .. note::
-        ROS convention follows the camera aligned with forward axis +Z and up axis -Y.
-    Shape is (N, 4) where ``N`` is the number of sensors.
-    """
    quat_w_world: torch.Tensor = None
    """Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following the world coordinate frame
@@ -43,15 +36,6 @@ class CameraData:
    Shape is ``(N, 4)`` where ``N`` is the number of sensors.
    """
-    quat_w_opengl: torch.Tensor = None
-    """Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following Opengl / Usd.Camera convention
-    .. note::
-        OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.
-    Shape is ``(N, 4)`` where ``N`` is the number of sensors.
-    """
    ##
    # Camera data
    ##
@@ -81,3 +65,30 @@ class CameraData:
    For semantic-based data, this corresponds to the ``"info"`` key in the output of the sensor. For other sensor
    types, the info is empty.
    """
+    ##
+    # Additional Frame orientation conventions
+    ##
+    @property
+    def quat_w_ros(self) -> torch.Tensor:
+        """Quaternion orientation `(w, x, y, z)` of the sensor origin in the world frame, following ROS convention.
+        .. note::
+            ROS convention follows the camera aligned with forward axis +Z and up axis -Y.
+        Shape is (N, 4) where ``N`` is the number of sensors.
+        """
+        return convert_orientation_convention(self.quat_w_world, origin="world", target="ros")
+    @property
+    def quat_w_opengl(self) -> torch.Tensor:
+        """Quaternion orientation `(w, x, y, z)` of the sensor origin in the world frame, following
+        Opengl / USD Camera convention.
+        .. note::
+            OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.
+        Shape is ``(N, 4)`` where ``N`` is the number of sensors.
+        """
+        return convert_orientation_convention(self.quat_w_world, origin="world", target="opengl")
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/utils.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/utils.py
@@ -9,10 +9,13 @@ from __future__ import annotations
 import math
 import numpy as np
 import torch
+import torch.nn.functional as F
 from typing import Sequence
 from typing_extensions import Literal
+import omni.isaac.core.utils.stage as stage_utils
 import warp as wp
+from pxr import UsdGeom
 import omni.isaac.orbit.utils.math as math_utils
 from omni.isaac.orbit.utils.array import TensorData, convert_to_torch
@@ -22,6 +25,7 @@ __all__ = [
    "create_pointcloud_from_depth",
    "create_pointcloud_from_rgbd",
    "convert_orientation_convention",
+    "create_rotation_matrix_from_view",
 ]
@@ -292,9 +296,9 @@ def convert_orientation_convention(
    Possible conventions are:
-    - :obj:"opengl" - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
+    - :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
-    - :obj:"ros"    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
+    - :obj:`"ros"`    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
-    - :obj:"world"  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
+    - :obj:`"world"`  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
    Args:
        orientation: Quaternion of form `(w, x, y, z)` with shape (..., 4) in source convention
@@ -348,3 +352,53 @@ def convert_orientation_convention(
        return math_utils.quat_from_matrix(rotm)
    else:
        return quat_gl.clone()
+# @torch.jit.script
+def create_rotation_matrix_from_view(
+    eyes: torch.Tensor,
+    targets: torch.Tensor,
+    device: str = "cpu",
+) -> torch.Tensor:
+    """
+    This function takes a vector ''eyes'' which specifies the location
+    of the camera in world coordinates and the vector ''targets'' which
+    indicate the position of the object.
+    The output is a rotation matrix representing the transformation
+    from world coordinates -> view coordinates.
+        The inputs camera_position and targets can each be a
+        - 3 element tuple/list
+        - torch tensor of shape (1, 3)
+        - torch tensor of shape (N, 3)
+    Args:
+        eyes: position of the camera in world coordinates
+        targets: position of the object in world coordinates
+    The vectors are broadcast against each other so they all have shape (N, 3).
+    Returns:
+        R: (N, 3, 3) batched rotation matrices
+    Reference:
+    Based on PyTorch3D (https://github.com/facebookresearch/pytorch3d/blob/eaf0709d6af0025fe94d1ee7cec454bc3054826a/pytorch3d/renderer/cameras.py#L1635-L1685)
+    """
+    up_axis_token = stage_utils.get_stage_up_axis()
+    if up_axis_token == UsdGeom.Tokens.y:
+        up_axis = torch.tensor((0, 1, 0), device=device, dtype=torch.float32).repeat(eyes.shape[0], 1)
+    elif up_axis_token == UsdGeom.Tokens.z:
+        up_axis = torch.tensor((0, 0, 1), device=device, dtype=torch.float32).repeat(eyes.shape[0], 1)
+    else:
+        raise ValueError(f"Invalid up axis: {up_axis_token}")
+    # get rotation matrix in opengl format (-Z forward, +Y up)
+    z_axis = -F.normalize(targets - eyes, eps=1e-5)
+    x_axis = F.normalize(torch.cross(up_axis, z_axis, dim=1), eps=1e-5)
+    y_axis = F.normalize(torch.cross(z_axis, x_axis, dim=1), eps=1e-5)
+    is_close = torch.isclose(x_axis, torch.tensor(0.0), atol=5e-3).all(dim=1, keepdim=True)
+    if is_close.any():
+        replacement = F.normalize(torch.cross(y_axis, z_axis, dim=1), eps=1e-5)
+        x_axis = torch.where(is_close, replacement, x_axis)
+    R = torch.cat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), dim=1)
+    return R.transpose(1, 2)
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/__init__.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/__init__.py
@@ -11,14 +11,19 @@ from __future__ import annotations
 from . import patterns
 from .ray_caster import RayCaster
+from .ray_caster_camera import RayCasterCamera
+from .ray_caster_camera_cfg import RayCasterCameraCfg
 from .ray_caster_cfg import RayCasterCfg
 from .ray_caster_data import RayCasterData
 __all__ = [
-    # sensor
+    # ray caster
    "RayCaster",
    "RayCasterData",
    "RayCasterCfg",
+    # camera
+    "RayCasterCameraCfg",
+    "RayCasterCamera",
    # patterns
    "patterns",
 ]
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns.py
@@ -6,7 +6,6 @@
 from __future__ import annotations
-import numpy as np
 import torch
 from typing import TYPE_CHECKING
@@ -41,26 +40,46 @@ def grid_pattern(cfg: patterns_cfg.GridPatternCfg, device: str) -> tuple[torch.T
    return ray_starts, ray_directions
-def pinhole_camera_pattern(cfg: patterns_cfg.PinholeCameraPatternCfg, device: str) -> tuple[torch.Tensor, torch.Tensor]:
+def pinhole_camera_pattern(
-    """The depth-image pattern for ray casting.
+    cfg: patterns_cfg.PinholeCameraPatternCfg, intrinsic_matrices: torch.Tensor, device: str
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """The image pattern for ray casting.
+    .. caution::
+        This function does not follow the standard pattern interface. It requires the intrinsic matrices
+        of the cameras to be passed in. This is because we want to be able to randomize the intrinsic
+        matrices of the cameras, which is not possible with the standard pattern interface.
    Args:
        cfg: The configuration instance for the pattern.
+        intrinsic_matrices: The intrinsic matrices of the cameras. Shape is (N, 3, 3).
        device: The device to create the pattern on.
    Returns:
-        The starting positions and directions of the rays.
+        The starting positions and directions of the rays. The shape of the tensors are
+        (N, H * W, 3) and (N, H * W, 3) respectively.
    """
-    x_grid = torch.full((cfg.height, cfg.width), cfg.far_plane, device=device)
+    # get image plane mesh grid
-    y_range = np.tan(np.deg2rad(cfg.horizontal_fov) / 2.0) * cfg.far_plane
+    grid = torch.meshgrid(
-    y = torch.linspace(y_range, -y_range, cfg.width, device=device)
+        torch.arange(start=0, end=cfg.width, dtype=torch.int32, device=device),
-    z_range = y_range * cfg.height / cfg.width
+        torch.arange(start=0, end=cfg.height, dtype=torch.int32, device=device),
-    z = torch.linspace(z_range, -z_range, cfg.height, device=device)
+        indexing="xy",
-    y_grid, z_grid = torch.meshgrid(y, z, indexing="xy")
+    )
+    pixels = torch.vstack(list(map(torch.ravel, grid))).T
-    ray_directions = torch.cat([x_grid.unsqueeze(2), y_grid.unsqueeze(2), z_grid.unsqueeze(2)], dim=2)
+    # convert to homogeneous coordinate system
-    ray_directions = torch.nn.functional.normalize(ray_directions, p=2.0, dim=-1).view(-1, 3)
+    pixels = torch.hstack([pixels, torch.ones((len(pixels), 1), device=device)])
-    ray_starts = torch.zeros_like(ray_directions)
+    # get pixel coordinates in camera frame
+    pix_in_cam_frame = torch.matmul(torch.inverse(intrinsic_matrices), pixels.T)
+    # robotics camera frame is (x forward, y left, z up) from camera frame with (x right, y down, z forward)
+    # transform to robotics camera frame
+    transform_vec = torch.tensor([1, -1, -1], device=device).unsqueeze(0).unsqueeze(2)
+    pix_in_cam_frame = pix_in_cam_frame[:, [2, 0, 1], :] * transform_vec
+    # normalize ray directions
+    ray_directions = (pix_in_cam_frame / torch.norm(pix_in_cam_frame, dim=1, keepdim=True)).permute(0, 2, 1)
+    # for camera, we always ray-cast from the sensor's origin
+    ray_starts = torch.zeros_like(ray_directions, device=device)
    return ray_starts, ray_directions

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns_cfg.py
@@ -52,14 +52,27 @@ class PinholeCameraPatternCfg(PatternBaseCfg):
    func = patterns.pinhole_camera_pattern
-    horizontal_fov: float = MISSING
+    focal_length: float = 24.0
-    """Horizontal field of view (in degrees)."""
+    """Perspective focal length (in cm). Defaults to 24.0cm.
+    Longer lens lengths narrower FOV, shorter lens lengths wider FOV.
+    """
+    horizontal_aperture: float = 20.955
+    """Horizontal aperture (in mm). Defaults to 20.955mm.
+    Emulates sensor/film width on a camera.
+    Note:
+        The default value is the horizontal aperture of a 35 mm spherical projector.
+    """
+    horizontal_aperture_offset: float = 0.0
+    """Offsets Resolution/Film gate horizontally. Defaults to 0.0."""
+    vertical_aperture_offset: float = 0.0
+    """Offsets Resolution/Film gate vertically. Defaults to 0.0."""
    width: int = MISSING
    """Width of the image (in pixels)."""
    height: int = MISSING
    """Height of the image (in pixels)."""
-    far_plane: float = MISSING
-    """Far plane of the image (in meters)."""
 @configclass

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster.py
@@ -8,10 +8,11 @@ from __future__ import annotations
 import numpy as np
 import torch
-from typing import TYPE_CHECKING, Sequence
+from typing import TYPE_CHECKING, ClassVar, Sequence
 import carb
 import omni.isaac.core.utils.prims as prim_utils
+import warp as wp
 from omni.isaac.core.articulations import ArticulationView
 from omni.isaac.core.prims import RigidPrimView, XFormPrimView
 from pxr import UsdGeom, UsdPhysics
@@ -47,6 +48,14 @@ class RayCaster(SensorBase):
    cfg: RayCasterCfg
    """The configuration parameters."""
+    meshes: ClassVar[dict[str, wp.Mesh]] = {}
+    """The warp meshes available for raycasting.
+    The keys correspond to the prim path for the meshes, and values are the corresponding warp Mesh objects.
+    Note:
+           We store a global dictionary of all warp meshes to prevent re-loading the mesh for different ray-cast sensor instances.
+    """
    def __init__(self, cfg: RayCasterCfg):
        """Initializes the ray-caster object.
@@ -58,8 +67,6 @@ class RayCaster(SensorBase):
        super().__init__(cfg)
        # Create empty variables for storing output data
        self._data = RayCasterData()
-        # List of meshes to ray-cast
-        self.warp_meshes = []
    def __str__(self) -> str:
        """Returns: A string containing information about the instance."""
@@ -67,7 +74,7 @@ class RayCaster(SensorBase):
            f"Ray-caster @ '{self.cfg.prim_path}': \n"
            f"\tview type            : {self._view.__class__}\n"
            f"\tupdate period (s)    : {self.cfg.update_period}\n"
-            f"\tnumber of meshes     : {len(self.warp_meshes)}\n"
+            f"\tnumber of meshes     : {len(RayCaster.meshes)}\n"
            f"\tnumber of sensors    : {self._view.count}\n"
            f"\tnumber of rays/sensor: {self.num_rays}\n"
            f"\ttotal number of rays : {self.num_rays * self._view.count}"
@@ -127,13 +134,24 @@ class RayCaster(SensorBase):
        self._view = prim_view_class(self.cfg.prim_path, reset_xform_properties=False)
        self._view.initialize()
+        # load the meshes by parsing the stage
+        self._initialize_warp_meshes()
+        # initialize the ray start and directions
+        self._initialize_rays_impl()
+    def _initialize_warp_meshes(self):
        # check number of mesh prims provided
        if len(self.cfg.mesh_prim_paths) != 1:
            raise NotImplementedError(
                f"RayCaster currently only supports one mesh prim. Received: {len(self.cfg.mesh_prim_paths)}"
            )
        # read prims to ray-cast
        for mesh_prim_path in self.cfg.mesh_prim_paths:
+            # check if mesh already casted into warp mesh
+            if mesh_prim_path in RayCaster.meshes:
+                continue
            # check if the prim is a plane - handle PhysX plane as a special case
            # if a plane exists then we need to create an infinite mesh that is a plane
            mesh_prim = prim_utils.get_first_matching_child_prim(
@@ -164,13 +182,15 @@ class RayCaster(SensorBase):
                # print info
                carb.log_info(f"Created infinite plane mesh prim: {mesh_prim.GetPath()}.")
            # add the warp mesh to the list
-            self.warp_meshes.append(wp_mesh)
+            RayCaster.meshes[mesh_prim_path] = wp_mesh
        # throw an error if no meshes are found
-        if len(self.warp_meshes) == 0:
+        if all([mesh_prim_path not in RayCaster.meshes for mesh_prim_path in self.cfg.mesh_prim_paths]):
            raise RuntimeError(
                f"No meshes found for ray-casting! Please check the mesh prim paths: {self.cfg.mesh_prim_paths}"
            )
+    def _initialize_rays_impl(self):
        # compute ray stars and directions
        self.ray_starts, self.ray_directions = self.cfg.pattern_cfg.func(self.cfg.pattern_cfg, self._device)
        self.num_rays = len(self.ray_directions)
@@ -210,7 +230,11 @@ class RayCaster(SensorBase):
            ray_directions_w = quat_apply(quat_w.repeat(1, self.num_rays), self.ray_directions[env_ids])
        # ray cast and store the hits
        # TODO: Make this work for multiple meshes?
-        self._data.ray_hits_w[env_ids] = raycast_mesh(ray_starts_w, ray_directions_w, self.warp_meshes[0])
+        self._data.ray_hits_w[env_ids] = raycast_mesh(
+            ray_starts_w,
+            ray_directions_w,
+            mesh=RayCaster.meshes[self.cfg.mesh_prim_paths[0]],
+        )[0]
    def _set_debug_vis_impl(self, debug_vis: bool):
        # set visibility of markers

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""Configuration for the ray-cast sensor."""
+from __future__ import annotations
+from typing_extensions import Literal
+from omni.isaac.orbit.utils import configclass
+from .ray_caster_camera import RayCasterCamera
+from .ray_caster_cfg import RayCasterCfg
+@configclass
+class RayCasterCameraCfg(RayCasterCfg):
+    """Configuration for the ray-cast sensor."""
+    @configclass
+    class OffsetCfg:
+        """The offset pose of the sensor's frame from the sensor's parent frame."""
+        pos: tuple[float, float, float] = (0.0, 0.0, 0.0)
+        """Translation w.r.t. the parent frame. Defaults to (0.0, 0.0, 0.0)."""
+        rot: tuple[float, float, float, float] = (1.0, 0.0, 0.0, 0.0)
+        """Quaternion rotation ``(w, x, y, z)`` w.r.t. the parent frame. Defaults to (1.0, 0.0, 0.0, 0.0)."""
+        convention: Literal["opengl", "ros", "world"] = "ros"
+        """The convention in which the frame offset is applied. Defaults to "ros".
+        - ``"opengl"`` - forward axis: ``-Z`` - up axis: ``+Y`` - Offset is applied in the OpenGL (Usd.Camera) convention.
+        - ``"ros"``    - forward axis: ``+Z`` - up axis: ``-Y`` - Offset is applied in the ROS convention.
+        - ``"world"``  - forward axis: ``+X`` - up axis: ``+Z`` - Offset is applied in the World Frame convention.
+        """
+    class_type: type = RayCasterCamera
+    offset: OffsetCfg = OffsetCfg()
+    """The offset pose of the sensor's frame from the sensor's parent frame. Defaults to identity."""
+    data_types: list[str] = ["distance_to_image_plane"]
+    """List of sensor names/types to enable for the camera. Defaults to ["distance_to_image_plane"]."""
+    def __post_init__(self):
+        # for cameras, this quantity should be False always.
+        self.attach_yaw_only = False
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/sensors/sensors_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/sensors/sensors_cfg.py
@@ -33,8 +33,12 @@ class PinholeCameraCfg(SpawnerCfg):
    Note:
        Currently only "pinhole" is supported.
    """
-    clipping_range: tuple[float, float] = (1.0, 1e6)
+    clipping_range: tuple[float, float] = (0.01, 1e6)
-    """Near and far clipping distances (in m). Defaults to (1.0, 1e6)."""
+    """Near and far clipping distances (in m). Defaults to (0.01, 1e6).
+    The minimum clipping range will shift the camera forward by the specified distance. Don't set it too high to
+    avoid issues for distance related data types (e.g., ``distance_to_image_plane``).
+    """
    focal_length: float = 24.0
    """Perspective focal length (in cm). Defaults to 24.0cm.

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/__init__.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/__init__.py
@@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
+from __future__ import annotations
 """Operations based on warp."""

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/kernels.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/kernels.py
@@ -15,6 +15,13 @@ def raycast_mesh_kernel(
    ray_starts: wp.array(dtype=wp.vec3),
    ray_directions: wp.array(dtype=wp.vec3),
    ray_hits: wp.array(dtype=wp.vec3),
+    ray_distance: wp.array(dtype=wp.float32),
+    ray_normal: wp.array(dtype=wp.vec3),
+    ray_face_id: wp.array(dtype=wp.int32),
+    max_dist: float = 1e6,
+    return_distance: int = False,
+    return_normal: int = False,
+    return_face_id: int = False,
 ):
    """Performs ray-casting against a mesh.
@@ -36,7 +43,6 @@ def raycast_mesh_kernel(
    # get the thread id
    tid = wp.tid()
-    max_dist = float(1e6)  # max ray-cast distance
    t = float(0.0)  # hit distance along ray
    u = float(0.0)  # hit face barycentric u
    v = float(0.0)  # hit face barycentric v
@@ -47,3 +53,9 @@ def raycast_mesh_kernel(
    # ray cast against the mesh and store the hit position
    if wp.mesh_query_ray(mesh, ray_starts[tid], ray_directions[tid], max_dist, t, u, v, sign, n, f):
        ray_hits[tid] = ray_starts[tid] + t * ray_directions[tid]
+        if return_distance == 1:
+            ray_distance[tid] = t
+        if return_normal == 1:
+            ray_normal[tid] = n
+        if return_face_id == 1:
+            ray_face_id[tid] = f
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/ops.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/ops.py
@@ -14,58 +14,122 @@ import warp as wp
 from . import kernels
-def raycast_mesh(ray_starts: torch.Tensor, ray_directions: torch.Tensor, mesh: wp.Mesh) -> torch.Tensor:
+def raycast_mesh(
+    ray_starts: torch.Tensor,
+    ray_directions: torch.Tensor,
+    mesh: wp.Mesh,
+    max_dist: float = 1e6,
+    return_distance: bool = False,
+    return_normal: bool = False,
+    return_face_id: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None]:
    """Performs ray-casting against a mesh.
    Note that the `ray_starts` and `ray_directions`, and `ray_hits` should have compatible shapes
    and data types to ensure proper execution. Additionally, they all must be in the same frame.
    Args:
-        ray_start: The starting position of the rays. Shape (N, 3).
+        ray_starts: The starting position of the rays. Shape (N, 3).
        ray_directions: The ray directions for each ray. Shape (N, 3).
        mesh: The warp mesh to ray-cast against.
+        max_dist: The maximum distance to ray-cast. Defaults to 1e6.
+        return_distance: Whether to return the distance of the ray until it hits the mesh. Defaults to False.
+        return_normal: Whether to return the normal of the mesh face the ray hits. Defaults to False.
+        return_face_id: Whether to return the face id of the mesh face the ray hits. Defaults to False.
    Returns:
-        The ray hit position. Shape (N, 3). The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit position. Shape (N, 3).
+            The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit distance. Shape (N,).
+            Will only return if `return_distance` is True, else returns None.
+            Always at second position of the output tuple.
+            The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit normal. Shape (N, 3).
+            Will only return if `return_normal` is True else returns None.
+            Always at third position of the output tuple.
+            The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit face id. Shape (N,).
+            Will only return if `return_face_id` is True else returns None.
+            Always at fourth position of the output tuple.
+            The returned tensor contains :obj:`int(-1)` for missed hits.
    """
    # extract device and shape information
    shape = ray_starts.shape
    device = ray_starts.device
    # device of the mesh
-    mesh_device = wp.device_to_torch(mesh.device)
+    torch_device = wp.device_to_torch(mesh.device)
    # reshape the tensors
-    ray_starts = ray_starts.to(mesh_device).view(-1, 3)
+    ray_starts = ray_starts.to(torch_device).view(-1, 3).contiguous()
-    ray_directions = ray_directions.to(mesh_device).view(-1, 3)
+    ray_directions = ray_directions.to(torch_device).view(-1, 3).contiguous()
    num_rays = ray_starts.shape[0]
    # create output tensor for the ray hits
-    ray_hits = torch.full((num_rays, 3), float("inf"), device=mesh_device)
+    ray_hits = torch.full((num_rays, 3), float("inf"), device=torch_device).contiguous()
    # map the memory to warp arrays
    ray_starts_wp = wp.from_torch(ray_starts, dtype=wp.vec3)
    ray_directions_wp = wp.from_torch(ray_directions, dtype=wp.vec3)
    ray_hits_wp = wp.from_torch(ray_hits, dtype=wp.vec3)
+    if return_distance:
+        ray_distance = torch.full((num_rays,), float("inf"), device=torch_device).contiguous()
+        ray_distance_wp = wp.from_torch(ray_distance, dtype=wp.float32)
+    else:
+        ray_distance = None
+        ray_distance_wp = wp.empty((1,), dtype=wp.float32, device=torch_device)
+    if return_normal:
+        ray_normal = torch.full((num_rays, 3), float("inf"), device=torch_device).contiguous()
+        ray_normal_wp = wp.from_torch(ray_normal, dtype=wp.vec3)
+    else:
+        ray_normal = None
+        ray_normal_wp = wp.empty((1,), dtype=wp.vec3, device=torch_device)
+    if return_face_id:
+        ray_face_id = torch.ones((num_rays,), dtype=torch.int32, device=torch_device).contiguous() * (-1)
+        ray_face_id_wp = wp.from_torch(ray_face_id, dtype=wp.int32)
+    else:
+        ray_face_id = None
+        ray_face_id_wp = wp.empty((1,), dtype=wp.int32, device=torch_device)
    # launch the warp kernel
    wp.launch(
        kernel=kernels.raycast_mesh_kernel,
        dim=num_rays,
-        inputs=[mesh.id, ray_starts_wp, ray_directions_wp, ray_hits_wp],
+        inputs=[
+            mesh.id,
+            ray_starts_wp,
+            ray_directions_wp,
+            ray_hits_wp,
+            ray_distance_wp,
+            ray_normal_wp,
+            ray_face_id_wp,
+            float(max_dist),
+            int(return_distance),
+            int(return_normal),
+            int(return_face_id),
+        ],
        device=mesh.device,
    )
    # NOTE: Synchronize is not needed anymore, but we keep it for now. Check with @dhoeller.
    wp.synchronize()
-    return ray_hits.to(device).view(shape)
+    if return_distance:
+        ray_distance = ray_distance.to(device).view(shape[0], shape[1])
+    if return_normal:
+        ray_normal = ray_normal.to(device).view(shape)
+    if return_face_id:
+        ray_face_id = ray_face_id.to(device).view(shape[0], shape[1])
+    return ray_hits.to(device).view(shape), ray_distance, ray_normal, ray_face_id
 def convert_to_warp_mesh(points: np.ndarray, indices: np.ndarray, device: str) -> wp.Mesh:
    """Create a warp mesh object with a mesh defined from vertices and triangles.
    Args:
-        points: The vertices of the mesh. Shape is :math:`(N, 3)`, where :math:`N`
+        points: The vertices of the mesh. Shape is (N, 3), where N is the number of vertices.
-            is the number of vertices.
        indices: The triangles of the mesh as references to vertices for each triangle.
-            Shape is :math:`(M, 3)`, where :math:`M` is the number of triangles / faces.
+            Shape is (M, 3), where M is the number of triangles / faces.
        device: The device to use for the mesh.
    Returns:

--- a/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py
+++ b/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py
@@ -23,6 +23,7 @@ import numpy as np
 import os
 import random
 import scipy.spatial.transform as tf
+import torch
 import traceback
 import unittest
@@ -246,7 +247,7 @@ class TestCamera(unittest.TestCase):
        # play sim
        self.sim.reset()
        # set new pose
-        camera.set_world_poses([POSITION], [QUAT_WORLD], convention="world")
+        camera.set_world_poses(torch.tensor([POSITION]), torch.tensor([QUAT_WORLD]), convention="world")
        np.testing.assert_allclose(camera.data.pos_w, [POSITION], rtol=1e-5)
        np.testing.assert_allclose(camera.data.quat_w_world, [QUAT_WORLD], rtol=1e-5)
@@ -256,7 +257,7 @@ class TestCamera(unittest.TestCase):
        # play sim
        self.sim.reset()
        # set new pose
-        camera.set_world_poses_from_view([POSITION], [[0.0, 0.0, 0.0]])
+        camera.set_world_poses_from_view(torch.tensor([POSITION]), torch.tensor([[0.0, 0.0, 0.0]]))
        np.testing.assert_allclose(camera.data.pos_w, [POSITION], rtol=1e-5)
        np.testing.assert_allclose(camera.data.quat_w_ros, [QUAT_ROS], rtol=1e-5)
@@ -308,7 +309,7 @@ class TestCamera(unittest.TestCase):
        # Play simulator
        self.sim.reset()
        # Set camera pose
-        camera.set_world_poses_from_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
+        camera.set_world_poses_from_view(torch.tensor([[2.5, 2.5, 2.5]]), torch.tensor([[0.0, 0.0, 0.0]]))
        # Simulate for a few steps
        # note: This is a workaround to ensure that the textures are loaded.
        #   Check "Known Issues" section in the documentation for more details.

--- a/source/extensions/omni.isaac.orbit/test/sensors/test_ray_caster_camera.py
+++ b/source/extensions/omni.isaac.orbit/test/sensors/test_ray_caster_camera.py
--- a/source/standalone/demo/play_camera.py
+++ b/source/standalone/demo/play_camera.py
@@ -20,8 +20,9 @@ from omni.isaac.orbit.app import AppLauncher
 # add argparse arguments
 parser = argparse.ArgumentParser(description="This script demonstrates how to use the camera sensor.")
-parser.add_argument("--gpu", action="store_true", default=False, help="Use GPU device for camera rendering output.")
+parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU device for camera output.")
 parser.add_argument("--draw", action="store_true", default=False, help="Draw the obtained pointcloud on viewport.")
+parser.add_argument("--save", action="store_true", default=False, help="Save the obtained data to disk.")
 # append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
 # parse the arguments
@@ -45,8 +46,6 @@ import omni.isaac.core.utils.prims as prim_utils
 import omni.isaac.debug_draw._debug_draw as omni_debug_draw
 import omni.replicator.core as rep
 from omni.isaac.core.prims import RigidPrim
-from omni.isaac.core.simulation_context import SimulationContext
-from omni.isaac.core.utils.viewports import set_camera_view
 from pxr import Gf, UsdGeom
 import omni.isaac.orbit.sim as sim_utils
@@ -54,13 +53,19 @@ from omni.isaac.orbit.sensors.camera import Camera, CameraCfg
 from omni.isaac.orbit.utils import convert_dict_to_backend
 from omni.isaac.orbit.utils.math import project_points, transform_points, unproject_depth
-"""
-Helpers
-"""
+def main():
+    """Main function."""
+    # Load kit helper
+    sim_cfg = sim_utils.SimulationCfg(device="cpu" if args_cli.cpu else "cuda")
+    sim = sim_utils.SimulationContext(sim_cfg)
+    # Set main camera
+    sim.set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
+    # Acquire draw interface
+    draw_interface = omni_debug_draw.acquire_debug_draw_interface()
-def design_scene():
+    # Populate scene
-    """Add prims to the scene."""
    # Spawn things into stage
    # Ground-plane
    cfg = sim_utils.GroundPlaneCfg()
@@ -94,26 +99,6 @@ def design_scene():
        geom_prim.CreateDisplayColorAttr()
        geom_prim.GetDisplayColorAttr().Set([color])
-"""
-Main
-"""
-def main():
-    """Runs a camera sensor from orbit."""
-    # Load kit helper
-    sim = SimulationContext(
-        physics_dt=0.005, rendering_dt=0.005, backend="torch", device="cuda" if args_cli.gpu else "cpu"
-    )
-    # Set main camera
-    set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
-    # Acquire draw interface
-    draw_interface = omni_debug_draw.acquire_debug_draw_interface()
-    # Populate scene
-    design_scene()
    # Setup camera sensor
    camera_cfg = CameraCfg(
        prim_path="/World/CameraSensor_.*/Cam",
@@ -140,12 +125,12 @@ def main():
    # Set pose: There are two ways to set the pose of the camera.
    # -- Option-1: Set pose using view
-    eyes = [[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]]
+    eyes = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device)
-    targets = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
+    targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device)
    camera.set_world_poses_from_view(eyes, targets)
    # -- Option-2: Set pose using ROS
-    # position = [[2.5, 2.5, 2.5]]
+    # position = torch.tensor([[2.5, 2.5, 2.5]], device=sim.device)
-    # orientation = [[-0.17591989, 0.33985114, 0.82047325, -0.42470819]]
+    # orientation = torch.tensor([[-0.17591989, 0.33985114, 0.82047325, -0.42470819]], device=sim.device)
    # camera.set_world_pose_ros(position, orientation, indices=[0])
    # Simulate for a few steps
@@ -168,44 +153,48 @@ def main():
        print("-------------------------------")
        # Extract camera data
-        camera_index = 1
+        if args_cli.save:
-        # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
+            # Save images from camera 1
-        if sim.backend == "torch":
+            camera_index = 1
-            # tensordict allows easy indexing of tensors in the dictionary
+            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
-            single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy")
+            if sim.backend == "torch":
-        else:
+                # tensordict allows easy indexing of tensors in the dictionary
-            # for numpy, we need to manually index the data
+                single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy")
-            single_cam_data = dict()
-            for key, value in camera.data.output.items():
-                single_cam_data[key] = value[camera_index]
-        # Extract the other information
-        single_cam_info = camera.data.info[camera_index]
-        # Pack data back into replicator format to save them using its writer
-        rep_output = dict()
-        for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
-            if info is not None:
-                rep_output[key] = {"data": data, "info": info}
            else:
-                rep_output[key] = data
+                # for numpy, we need to manually index the data
-        # Save images
+                single_cam_data = dict()
-        rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
+                for key, value in camera.data.output.items():
-        rep_writer.write(rep_output)
+                    single_cam_data[key] = value[camera_index]
+            # Extract the other information
-        # Pointcloud in world frame
+            single_cam_info = camera.data.info[camera_index]
-        points_3d_cam = unproject_depth(camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices)
-        points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)
+            # Pack data back into replicator format to save them using its writer
+            rep_output = dict()
-        # Check methods are valid
+            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
-        im_height, im_width = camera.image_shape
+                if info is not None:
-        # -- project points to (u, v, d)
+                    rep_output[key] = {"data": data, "info": info}
-        reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices)
+                else:
-        reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2)
+                    rep_output[key] = data
-        sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
+            # Save images
-        torch.testing.assert_allclose(reproj_depths, sim_depths)
+            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
+            rep_writer.write(rep_output)
        # Draw pointcloud
        if not args_cli.headless and args_cli.draw:
+            # Pointcloud in world frame
+            points_3d_cam = unproject_depth(
+                camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
+            )
+            points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)
+            # Check methods are valid
+            im_height, im_width = camera.image_shape
+            # -- project points to (u, v, d)
+            reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices)
+            reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2)
+            sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
+            torch.testing.assert_allclose(reproj_depths, sim_depths)
            # Convert to numpy for visualization
            if not isinstance(points_3d_world, np.ndarray):
                points_3d_world = points_3d_world.cpu().numpy()

--- a/source/standalone/demo/play_ray_caster_camera.py
+++ b/source/standalone/demo/play_ray_caster_camera.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""
+This script shows how to use the ray-cast camera sensor from the Orbit framework.
+The camera sensor is based on using Warp kernels which do ray-casting against static meshes.
+"""
+"""Launch Isaac Sim Simulator first."""
+import argparse
+# omni-isaac-orbit
+from omni.isaac.orbit.app import AppLauncher
+# add argparse arguments
+parser = argparse.ArgumentParser(description="This script demonstrates how to use the ray-cast camera sensor.")
+parser.add_argument("--headless", action="store_true", default=False, help="Force display off at all times.")
+parser.add_argument("--num_envs", type=int, default=16, help="Number of environments to generate.")
+parser.add_argument("--save", type=int, default=16, help="Number of environments to generate.")
+args_cli = parser.parse_args()
+# launch omniverse app
+app_launcher = AppLauncher(headless=args_cli.headless)
+simulation_app = app_launcher.app
+"""Rest everything follows."""
+import os
+import torch
+import traceback
+import carb
+import omni.isaac.core.utils.prims as prim_utils
+import omni.replicator.core as rep
+import omni.isaac.orbit.sim as sim_utils
+import omni.isaac.orbit.terrains as terrain_gen
+from omni.isaac.orbit.sensors.ray_caster import RayCasterCamera, RayCasterCameraCfg, patterns
+from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG
+from omni.isaac.orbit.terrains.terrain_importer import TerrainImporter
+from omni.isaac.orbit.utils import convert_dict_to_backend
+from omni.isaac.orbit.utils.math import project_points, unproject_depth
+def main():
+    """Main function."""
+    # Load kit helper
+    sim = sim_utils.SimulationContext()
+    # Set main camera
+    sim.set_camera_view([2.5, 2.5, 3.5], [0.0, 0.0, 0.0])
+    # Populate scene
+    # Handler for terrains importing
+    terrain_importer_cfg = terrain_gen.TerrainImporterCfg(
+        num_envs=args_cli.num_envs,
+        env_spacing=3.0,
+        prim_path="/World/ground",
+        max_init_terrain_level=None,
+        terrain_type="generator",
+        terrain_generator=ROUGH_TERRAINS_CFG.replace(curriculum=True, num_rows=4, num_cols=4),
+        debug_vis=True,
+    )
+    terrain = TerrainImporter(terrain_importer_cfg)
+    # Lights
+    cfg = sim_utils.DistantLightCfg(intensity=600.0, color=(0.75, 0.75, 0.75))
+    cfg.func("/World/Light", cfg)
+    # Camera base frames
+    prim_utils.create_prim("/World/envs", "Scope")
+    for index in range(args_cli.num_envs):
+        prim_utils.create_prim(f"/World/envs/env_{index}", "Xform", translation=terrain.env_origins[index])
+        prim_utils.create_prim(f"/World/envs/env_{index}/Camera", "Xform")
+    # Setup camera sensor
+    camera_cfg = RayCasterCameraCfg(
+        prim_path="/World/envs/env_.*/Camera",
+        mesh_prim_paths=["/World/ground"],
+        update_period=0,
+        offset=RayCasterCameraCfg.OffsetCfg(pos=(0.0, 0.0, 0.0), rot=(1.0, 0.0, 0.0, 0.0)),
+        data_types=["distance_to_image_plane", "normals", "distance_to_camera"],
+        debug_vis=True,
+        pattern_cfg=patterns.PinholeCameraPatternCfg(
+            focal_length=24.0,
+            horizontal_aperture=20.955,
+            height=480,
+            width=640,
+        ),
+    )
+    # create xform because placement of camera directly under world is not supported
+    prim_utils.create_prim("/World/Camera", "Xform")
+    # Create camera
+    camera = RayCasterCamera(cfg=camera_cfg)
+    # Create replicator writer
+    output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "ray_caster_camera")
+    rep_writer = rep.BasicWriter(output_dir=output_dir, frame_padding=3)
+    # Play simulator
+    sim.play()
+    # Set pose: There are two ways to set the pose of the camera.
+    # -- Option-1: Set pose using view
+    eyes = torch.tensor([[-1.0, 0, 2] * args_cli.num_envs], device=sim.device).reshape(-1, 3)
+    targets = torch.tensor([[0.0, 0.0, 0.0] * args_cli.num_envs], device=sim.device).reshape(-1, 3)
+    camera.set_world_poses_from_view(eyes + terrain.env_origins, targets + terrain.env_origins)
+    # -- Option-2: Set pose using ROS
+    # position = torch.tensor([[2.5, 2.5, 2.5]], device=sim.device)
+    # orientation = torch.tensor([[-0.17591989, 0.33985114, 0.82047325, -0.42470819]], device=sim.device)
+    # camera.set_world_pose_ros(position, orientation, indices=[0])
+    # Simulate physics
+    while simulation_app.is_running():
+        # Step simulation
+        sim.step()
+        # Update camera data
+        camera.update(dt=sim.get_physics_dt())
+        # Print camera info
+        print(camera)
+        print("Received shape of depth image: ", camera.data.output["distance_to_image_plane"].shape)
+        print("-------------------------------")
+        # Extract camera data
+        if args_cli.save:
+            # Extract camera data
+            camera_index = 0
+            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
+            if sim.backend == "torch":
+                # tensordict allows easy indexing of tensors in the dictionary
+                single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy")
+            else:
+                # for numpy, we need to manually index the data
+                single_cam_data = dict()
+                for key, value in camera.data.output.items():
+                    single_cam_data[key] = value[camera_index]
+            # Extract the other information
+            single_cam_info = camera.data.info[camera_index]
+            # Pack data back into replicator format to save them using its writer
+            rep_output = dict()
+            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
+                if info is not None:
+                    rep_output[key] = {"data": data, "info": info}
+                else:
+                    rep_output[key] = data
+            # Save images
+            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
+            rep_writer.write(rep_output)
+            # Pointcloud in world frame
+            points_3d_cam = unproject_depth(
+                camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
+            )
+            # Check methods are valid
+            im_height, im_width = camera.image_shape
+            # -- project points to (u, v, d)
+            reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices)
+            reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2)
+            sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
+            torch.testing.assert_allclose(reproj_depths, sim_depths)
+if __name__ == "__main__":
+    try:
+        # run the main execution
+        main()
+    except Exception as err:
+        carb.log_error(err)
+        carb.log_error(traceback.format_exc())
+        raise
+    finally:
+        # close sim app
+        simulation_app.close()