Adds raycasting-based camera implementation (#159)

This MR adds the `RayCasterCamera` as a ray-cast-based camera for "distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and functionalities as the USD Camera wrapper, while it is, on average, 30% faster. Fixes #131 - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - This change requires a documentation update - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file --------- Signed-off-by: Pascal Roth <57946385+pascal-roth@users.noreply.github.com> Co-authored-by: Mayank Mittal <mittalma@leggedrobotics.com>

Adds raycasting-based camera implementation (#159)
This MR adds the `RayCasterCamera` as a ray-cast-based camera for "distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and functionalities as the USD Camera wrapper, while it is, on average, 30% faster. Fixes #131 - Bug fix (non-breaking change which fixes an issue) - New feature (non-breaking change which adds functionality) - This change requires a documentation update - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./orbit.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file --------- Signed-off-by: Pascal Roth <57946385+pascal-roth@users.noreply.github.com> Co-authored-by: Mayank Mittal <mittalma@leggedrobotics.com>
5c3fa70f · Pascal Roth · Mayank Mittal · e7d43de3 · 5c3fa70f · 5c3fa70f
Commit 5c3fa70f authored Nov 01, 2023 by Pascal Roth Committed by Mayank Mittal Nov 01, 2023
20 changed files
--- a/source/extensions/omni.isaac.orbit/config/extension.toml
+++ b/source/extensions/omni.isaac.orbit/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.9.30"
+version = "0.9.31"
 # Description
 title = "ORBIT framework for Robot Learning"

--- a/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
 Changelog
 ---------
+0.9.31 (2023-11-02)
+~~~~~~~~~~~~~~~~~~~
+Added
+^^^^^
+* Added the :class:`omni.isaac.orbit.sensors.RayCasterCamera` class, as a ray-casting based camera for
+  "distance_to_camera", "distance_to_image_plane" and "normals" annotations. It has the same interface and
+  functionalities as the USD Camera while it is on average 30% faster.
 0.9.30 (2023-11-01)
 ~~~~~~~~~~~~~~~~~~~
@@ -155,7 +166,6 @@ Added
 0.9.18 (2023-10-23)
-~~~~~~~~~~~~~~~~~~~
 Added
 ^^^^^

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/markers/config/__init__.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/markers/config/__init__.py
@@ -19,7 +19,7 @@ RAY_CASTER_MARKER_CFG = VisualizationMarkersCfg(
            radius=0.02,
            visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
        ),
-    }
+    },
 )
 """Configuration for the ray-caster marker."""

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py
@@ -13,12 +13,10 @@ from typing import TYPE_CHECKING, Any, Sequence
 from typing_extensions import Literal
 import omni.isaac.core.utils.prims as prim_utils
-import omni.isaac.core.utils.stage as stage_utils
 import omni.kit.commands
 import omni.usd
 from omni.isaac.core.prims import XFormPrimView
-from omni.isaac.core.utils.rotations import gf_quat_to_np_array
+from pxr import Usd, UsdGeom
-from pxr import Gf, Usd, UsdGeom
 # omni-isaac-orbit
 from omni.isaac.orbit.utils import to_camel_case
@@ -27,7 +25,7 @@ from omni.isaac.orbit.utils.math import quat_from_matrix
 from ..sensor_base import SensorBase
 from .camera_data import CameraData
-from .utils import convert_orientation_convention
+from .utils import convert_orientation_convention, create_rotation_matrix_from_view
 if TYPE_CHECKING:
    from .camera_cfg import CameraCfg
@@ -66,6 +64,8 @@ class Camera(SensorBase):
    cfg: CameraCfg
    """The configuration parameters."""
+    UNSUPPORTED_TYPES: set[str] = {"bounding_box_2d_tight", "bounding_box_2d_loose", "bounding_box_3d"}
+    """The set of sensor types that are not supported by the camera class."""
    def __init__(self, cfg: CameraCfg):
        """Initializes the camera sensor.
@@ -101,8 +101,7 @@ class Camera(SensorBase):
        self._data = CameraData()
        # check if there is any intersection in unsupported types
        # reason: these use np structured data types which we can't yet convert to torch tensor
-        unsupported_types = {"bounding_box_2d_tight", "bounding_box_2d_loose", "bounding_box_3d"}
+        common_elements = set(self.cfg.data_types) & Camera.UNSUPPORTED_TYPES
-        common_elements = set(self.cfg.data_types) & unsupported_types
        if common_elements:
            raise ValueError(
                f"Camera class does not support the following sensor types: {common_elements}."
@@ -167,7 +166,7 @@ class Camera(SensorBase):
    """
    def set_intrinsic_matrices(
-        self, matrices: torch.Tensor, focal_length: float = 1.0, indices: Sequence[int] | None = None
+        self, matrices: torch.Tensor, focal_length: float = 1.0, env_ids: Sequence[int] | None = None
    ):
        """Set parameters of the USD camera from its intrinsic matrix.
@@ -186,20 +185,15 @@ class Camera(SensorBase):
            is not true in the input intrinsic matrix, then the camera will not set up correctly.
        Args:
-            matrices: The intrinsic matrices for the camera. Shape is :math:`(N, 3, 3)`.
+            matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3).
            focal_length: Focal length to use when computing aperture values. Defaults to 1.0.
-            indices: A list of indices of length :obj:`N` to specify the prims to manipulate.
+            env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
-                Defaults to None, which means all prims will be manipulated.
        """
-        # resolve indices
+        # resolve env_ids
-        # check camera prim exists
+        if env_ids is None:
-        if not self._is_initialized:
+            env_ids = self._ALL_INDICES
-            raise RuntimeError("Camera is not initialized. Please call 'sim.play()' first.")
+        # iterate over env_ids
-        # resolve indices
+        for i, matrix in zip(env_ids, matrices):
-        if indices is None:
-            indices = self._ALL_INDICES
-        # iterate over indices
-        for i, matrix in zip(indices, matrices):
            # convert to numpy for sanity
            intrinsic_matrix = np.asarray(matrix, dtype=float)
            # extract parameters from matrix
@@ -240,7 +234,7 @@ class Camera(SensorBase):
        self,
        positions: torch.Tensor | None = None,
        orientations: torch.Tensor | None = None,
-        indices: Sequence[int] | None = None,
+        env_ids: Sequence[int] | None = None,
        convention: Literal["opengl", "ros", "world"] = "ros",
    ):
        r"""Set the pose of the camera w.r.t. the world frame using specified convention.
@@ -248,32 +242,27 @@ class Camera(SensorBase):
        Since different fields use different conventions for camera orientations, the method allows users to
        set the camera poses in the specified convention. Possible conventions are:
-        - :obj:"opengl" - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
+        - :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
-        - :obj:"ros"    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
+        - :obj:`"ros"`    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
-        - :obj:"world"  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
+        - :obj:`"world"`  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
        See :meth:`omni.isaac.orbit.sensors.camera.utils.convert_orientation_convention` for more details
        on the conventions.
        Args:
-            positions: The cartesian coordinates (in meters). Shape is :math:`(N, 3)`.
+            positions: The cartesian coordinates (in meters). Shape is (N, 3).
                Defaults to None, in which case the camera position in not changed.
-            orientations: The quaternion orientation in (w, x, y, z). Shape is :math:`(N, 4)`.
+            orientations: The quaternion orientation in (w, x, y, z). Shape is (N, 4).
                Defaults to None, in which case the camera orientation in not changed.
-            indices: A list of indices of length :obj:`N` to specify the prims to manipulate.
+            env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
-                Defaults to None, which means all prims will be manipulated.
+            convention: The convention in which the poses are fed. Defaults to "ros".
-            convention: The convention in which the poses are fed.
-                Defaults to "ros".
        Raises:
            RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
        """
-        # check camera prim exists
+        # resolve env_ids
-        if not self._is_initialized:
+        if env_ids is None:
-            raise RuntimeError("Camera is not initialized. Please call 'sim.play()' first.")
+            env_ids = self._ALL_INDICES
-        # resolve indices
-        if indices is None:
-            indices = self._ALL_INDICES
        # convert to backend tensor
        if positions is not None:
            if isinstance(positions, np.ndarray):
@@ -288,71 +277,28 @@ class Camera(SensorBase):
                orientations = torch.tensor(orientations, device=self._device)
            orientations = convert_orientation_convention(orientations, origin=convention, target="opengl")
        # set the pose
-        self._view.set_world_poses(positions, orientations, indices)
+        self._view.set_world_poses(positions, orientations, env_ids)
    def set_world_poses_from_view(
-        self, eyes: torch.Tensor, targets: torch.Tensor, indices: Sequence[int] | None = None
+        self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None
    ):
        """Set the poses of the camera from the eye position and look-at target position.
        Args:
-            eyes: The positions of the camera's eye. Shape is :math:`(N, 3)`.
+            eyes: The positions of the camera's eye. Shape is (N, 3).
-            targets: The target locations to look at. Shape is :math:`(N, 3)`.
+            targets: The target locations to look at. Shape is (N, 3).
-            indices: A list of indices of length :math:`N` to specify the prims to manipulate.
+            env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
-                Defaults to None, which means all prims will be manipulated.
        Raises:
            RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
            NotImplementedError: If the stage up-axis is not "Y" or "Z".
        """
-        # check camera prim exists
+        # resolve env_ids
-        if not self._is_initialized:
+        if env_ids is None:
-            raise RuntimeError("Camera is not initialized. Please 'sim.play()' first.")
+            env_ids = self._ALL_INDICES
-        # resolve indices
-        if indices is None:
-            indices = self._ALL_INDICES
-        # create tensors for storing poses
-        positions = torch.zeros((len(indices), 3), device=self._device)
-        orientations = torch.zeros((len(indices), 4), device=self._device)
-        # iterate over all indices
-        # TODO: Can we do this in parallel?
-        for i, eye, target in zip(indices, eyes, targets):
-            # compute camera's eye pose
-            eye_position = Gf.Vec3d(np.asarray(eye).tolist())
-            target_position = Gf.Vec3d(np.asarray(target).tolist())
-            # compute forward direction
-            forward_dir = (eye_position - target_position).GetNormalized()
-            # get up axis
-            up_axis_token = stage_utils.get_stage_up_axis()
-            if up_axis_token == UsdGeom.Tokens.y:
-                # deal with degenerate case
-                if forward_dir == Gf.Vec3d(0, 1, 0):
-                    up_axis = Gf.Vec3d(0, 0, 1)
-                elif forward_dir == Gf.Vec3d(0, -1, 0):
-                    up_axis = Gf.Vec3d(0, 0, -1)
-                else:
-                    up_axis = Gf.Vec3d(0, 1, 0)
-            elif up_axis_token == UsdGeom.Tokens.z:
-                # deal with degenerate case
-                if forward_dir == Gf.Vec3d(0, 0, 1):
-                    up_axis = Gf.Vec3d(0, 1, 0)
-                elif forward_dir == Gf.Vec3d(0, 0, -1):
-                    up_axis = Gf.Vec3d(0, -1, 0)
-                else:
-                    up_axis = Gf.Vec3d(0, 0, 1)
-            else:
-                raise NotImplementedError(f"This method is not supported for up-axis '{up_axis_token}'.")
-            # compute matrix transformation
-            # view matrix: camera_T_world
-            matrix_gf = Gf.Matrix4d(1).SetLookAt(eye_position, target_position, up_axis)
-            # camera position and rotation in world frame
-            matrix_gf = matrix_gf.GetInverse()
-            positions[i] = torch.from_numpy(np.asarray(matrix_gf.ExtractTranslation())).to(device=self._device)
-            orientations[i] = torch.from_numpy(gf_quat_to_np_array(matrix_gf.ExtractRotationQuat())).to(
-                device=self._device
-            )
        # set camera poses using the view
-        self._view.set_world_poses(positions, orientations, indices)
+        orientations = quat_from_matrix(create_rotation_matrix_from_view(eyes, targets, device=self._device))
+        self._view.set_world_poses(eyes, orientations, env_ids)
    """
    Operations
@@ -401,7 +347,7 @@ class Camera(SensorBase):
                f" the number of environments ({self._num_envs})."
            )
-        # Create all indices buffer
+        # Create all env_ids buffer
        self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
        # Create frame count buffer
        self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
@@ -455,9 +401,6 @@ class Camera(SensorBase):
        self._create_buffers()
    def _update_buffers_impl(self, env_ids: Sequence[int]):
-        # check camera prim exists
-        if not self._is_initialized:
-            raise RuntimeError("Camera is not initialized. Please call 'sim.play()' first.")
        # Increment frame count
        self._frame[env_ids] += 1
        # -- intrinsic matrix
@@ -493,9 +436,7 @@ class Camera(SensorBase):
        # create the data object
        # -- pose of the cameras
        self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
-        self._data.quat_w_ros = torch.zeros((self._view.count, 4), device=self._device)
+        self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
-        self._data.quat_w_world = torch.zeros_like(self._data.quat_w_ros)
-        self._data.quat_w_opengl = torch.zeros_like(self._data.quat_w_ros)
        # -- intrinsic matrix
        self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
        self._data.image_shape = self.image_shape
@@ -564,13 +505,9 @@ class Camera(SensorBase):
        # extract the position (convert it to SI units-- assumed that stage units is 1.0)
        self._data.pos_w[env_ids] = torch.tensor(prim_tf_all[:, 0:3, 3], device=self._device, dtype=torch.float32)
-        # save opengl convention
+        # save quat in world convention
        quat_opengl = quat_from_matrix(torch.tensor(prim_tf_all[:, 0:3, 0:3], device=self._device, dtype=torch.float32))
-        self._data.quat_w_opengl[env_ids] = quat_opengl
-        # save world and ros convention
        self._data.quat_w_world[env_ids] = convert_orientation_convention(quat_opengl, origin="opengl", target="world")
-        self._data.quat_w_ros[env_ids] = convert_orientation_convention(quat_opengl, origin="opengl", target="ros")
    def _create_annotator_data(self):
        """Create the buffers to store the annotator data.

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_data.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_data.py
@@ -10,6 +10,8 @@ from dataclasses import dataclass
 from tensordict import TensorDict
 from typing import Any
+from .utils import convert_orientation_convention
 @dataclass
 class CameraData:
@@ -25,15 +27,6 @@ class CameraData:
    Shape is (N, 3) where ``N`` is the number of sensors.
    """
-    quat_w_ros: torch.Tensor = None
-    """Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following ROS convention.
-    .. note::
-        ROS convention follows the camera aligned with forward axis +Z and up axis -Y.
-    Shape is (N, 4) where ``N`` is the number of sensors.
-    """
    quat_w_world: torch.Tensor = None
    """Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following the world coordinate frame
@@ -43,15 +36,6 @@ class CameraData:
    Shape is ``(N, 4)`` where ``N`` is the number of sensors.
    """
-    quat_w_opengl: torch.Tensor = None
-    """Quaternion orientation `(w, x, y, z)` of the sensor origin in world frame, following Opengl / Usd.Camera convention
-    .. note::
-        OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.
-    Shape is ``(N, 4)`` where ``N`` is the number of sensors.
-    """
    ##
    # Camera data
    ##
@@ -81,3 +65,30 @@ class CameraData:
    For semantic-based data, this corresponds to the ``"info"`` key in the output of the sensor. For other sensor
    types, the info is empty.
    """
+    ##
+    # Additional Frame orientation conventions
+    ##
+    @property
+    def quat_w_ros(self) -> torch.Tensor:
+        """Quaternion orientation `(w, x, y, z)` of the sensor origin in the world frame, following ROS convention.
+        .. note::
+            ROS convention follows the camera aligned with forward axis +Z and up axis -Y.
+        Shape is (N, 4) where ``N`` is the number of sensors.
+        """
+        return convert_orientation_convention(self.quat_w_world, origin="world", target="ros")
+    @property
+    def quat_w_opengl(self) -> torch.Tensor:
+        """Quaternion orientation `(w, x, y, z)` of the sensor origin in the world frame, following
+        Opengl / USD Camera convention.
+        .. note::
+            OpenGL convention follows the camera aligned with forward axis -Z and up axis +Y.
+        Shape is ``(N, 4)`` where ``N`` is the number of sensors.
+        """
+        return convert_orientation_convention(self.quat_w_world, origin="world", target="opengl")
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/utils.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/utils.py
@@ -9,10 +9,13 @@ from __future__ import annotations
 import math
 import numpy as np
 import torch
+import torch.nn.functional as F
 from typing import Sequence
 from typing_extensions import Literal
+import omni.isaac.core.utils.stage as stage_utils
 import warp as wp
+from pxr import UsdGeom
 import omni.isaac.orbit.utils.math as math_utils
 from omni.isaac.orbit.utils.array import TensorData, convert_to_torch
@@ -22,6 +25,7 @@ __all__ = [
    "create_pointcloud_from_depth",
    "create_pointcloud_from_rgbd",
    "convert_orientation_convention",
+    "create_rotation_matrix_from_view",
 ]
@@ -292,9 +296,9 @@ def convert_orientation_convention(
    Possible conventions are:
-    - :obj:"opengl" - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
+    - :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
-    - :obj:"ros"    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
+    - :obj:`"ros"`    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
-    - :obj:"world"  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
+    - :obj:`"world"`  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
    Args:
        orientation: Quaternion of form `(w, x, y, z)` with shape (..., 4) in source convention
@@ -348,3 +352,53 @@ def convert_orientation_convention(
        return math_utils.quat_from_matrix(rotm)
    else:
        return quat_gl.clone()
+# @torch.jit.script
+def create_rotation_matrix_from_view(
+    eyes: torch.Tensor,
+    targets: torch.Tensor,
+    device: str = "cpu",
+) -> torch.Tensor:
+    """
+    This function takes a vector ''eyes'' which specifies the location
+    of the camera in world coordinates and the vector ''targets'' which
+    indicate the position of the object.
+    The output is a rotation matrix representing the transformation
+    from world coordinates -> view coordinates.
+        The inputs camera_position and targets can each be a
+        - 3 element tuple/list
+        - torch tensor of shape (1, 3)
+        - torch tensor of shape (N, 3)
+    Args:
+        eyes: position of the camera in world coordinates
+        targets: position of the object in world coordinates
+    The vectors are broadcast against each other so they all have shape (N, 3).
+    Returns:
+        R: (N, 3, 3) batched rotation matrices
+    Reference:
+    Based on PyTorch3D (https://github.com/facebookresearch/pytorch3d/blob/eaf0709d6af0025fe94d1ee7cec454bc3054826a/pytorch3d/renderer/cameras.py#L1635-L1685)
+    """
+    up_axis_token = stage_utils.get_stage_up_axis()
+    if up_axis_token == UsdGeom.Tokens.y:
+        up_axis = torch.tensor((0, 1, 0), device=device, dtype=torch.float32).repeat(eyes.shape[0], 1)
+    elif up_axis_token == UsdGeom.Tokens.z:
+        up_axis = torch.tensor((0, 0, 1), device=device, dtype=torch.float32).repeat(eyes.shape[0], 1)
+    else:
+        raise ValueError(f"Invalid up axis: {up_axis_token}")
+    # get rotation matrix in opengl format (-Z forward, +Y up)
+    z_axis = -F.normalize(targets - eyes, eps=1e-5)
+    x_axis = F.normalize(torch.cross(up_axis, z_axis, dim=1), eps=1e-5)
+    y_axis = F.normalize(torch.cross(z_axis, x_axis, dim=1), eps=1e-5)
+    is_close = torch.isclose(x_axis, torch.tensor(0.0), atol=5e-3).all(dim=1, keepdim=True)
+    if is_close.any():
+        replacement = F.normalize(torch.cross(y_axis, z_axis, dim=1), eps=1e-5)
+        x_axis = torch.where(is_close, replacement, x_axis)
+    R = torch.cat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), dim=1)
+    return R.transpose(1, 2)
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/__init__.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/__init__.py
@@ -11,14 +11,19 @@ from __future__ import annotations
 from . import patterns
 from .ray_caster import RayCaster
+from .ray_caster_camera import RayCasterCamera
+from .ray_caster_camera_cfg import RayCasterCameraCfg
 from .ray_caster_cfg import RayCasterCfg
 from .ray_caster_data import RayCasterData
 __all__ = [
-    # sensor
+    # ray caster
    "RayCaster",
    "RayCasterData",
    "RayCasterCfg",
+    # camera
+    "RayCasterCameraCfg",
+    "RayCasterCamera",
    # patterns
    "patterns",
 ]
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns.py
@@ -6,7 +6,6 @@
 from __future__ import annotations
-import numpy as np
 import torch
 from typing import TYPE_CHECKING
@@ -41,26 +40,46 @@ def grid_pattern(cfg: patterns_cfg.GridPatternCfg, device: str) -> tuple[torch.T
    return ray_starts, ray_directions
-def pinhole_camera_pattern(cfg: patterns_cfg.PinholeCameraPatternCfg, device: str) -> tuple[torch.Tensor, torch.Tensor]:
+def pinhole_camera_pattern(
-    """The depth-image pattern for ray casting.
+    cfg: patterns_cfg.PinholeCameraPatternCfg, intrinsic_matrices: torch.Tensor, device: str
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """The image pattern for ray casting.
+    .. caution::
+        This function does not follow the standard pattern interface. It requires the intrinsic matrices
+        of the cameras to be passed in. This is because we want to be able to randomize the intrinsic
+        matrices of the cameras, which is not possible with the standard pattern interface.
    Args:
        cfg: The configuration instance for the pattern.
+        intrinsic_matrices: The intrinsic matrices of the cameras. Shape is (N, 3, 3).
        device: The device to create the pattern on.
    Returns:
-        The starting positions and directions of the rays.
+        The starting positions and directions of the rays. The shape of the tensors are
+        (N, H * W, 3) and (N, H * W, 3) respectively.
    """
-    x_grid = torch.full((cfg.height, cfg.width), cfg.far_plane, device=device)
+    # get image plane mesh grid
-    y_range = np.tan(np.deg2rad(cfg.horizontal_fov) / 2.0) * cfg.far_plane
+    grid = torch.meshgrid(
-    y = torch.linspace(y_range, -y_range, cfg.width, device=device)
+        torch.arange(start=0, end=cfg.width, dtype=torch.int32, device=device),
-    z_range = y_range * cfg.height / cfg.width
+        torch.arange(start=0, end=cfg.height, dtype=torch.int32, device=device),
-    z = torch.linspace(z_range, -z_range, cfg.height, device=device)
+        indexing="xy",
-    y_grid, z_grid = torch.meshgrid(y, z, indexing="xy")
+    )
+    pixels = torch.vstack(list(map(torch.ravel, grid))).T
-    ray_directions = torch.cat([x_grid.unsqueeze(2), y_grid.unsqueeze(2), z_grid.unsqueeze(2)], dim=2)
+    # convert to homogeneous coordinate system
-    ray_directions = torch.nn.functional.normalize(ray_directions, p=2.0, dim=-1).view(-1, 3)
+    pixels = torch.hstack([pixels, torch.ones((len(pixels), 1), device=device)])
-    ray_starts = torch.zeros_like(ray_directions)
+    # get pixel coordinates in camera frame
+    pix_in_cam_frame = torch.matmul(torch.inverse(intrinsic_matrices), pixels.T)
+    # robotics camera frame is (x forward, y left, z up) from camera frame with (x right, y down, z forward)
+    # transform to robotics camera frame
+    transform_vec = torch.tensor([1, -1, -1], device=device).unsqueeze(0).unsqueeze(2)
+    pix_in_cam_frame = pix_in_cam_frame[:, [2, 0, 1], :] * transform_vec
+    # normalize ray directions
+    ray_directions = (pix_in_cam_frame / torch.norm(pix_in_cam_frame, dim=1, keepdim=True)).permute(0, 2, 1)
+    # for camera, we always ray-cast from the sensor's origin
+    ray_starts = torch.zeros_like(ray_directions, device=device)
    return ray_starts, ray_directions

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/patterns/patterns_cfg.py
@@ -52,14 +52,27 @@ class PinholeCameraPatternCfg(PatternBaseCfg):
    func = patterns.pinhole_camera_pattern
-    horizontal_fov: float = MISSING
+    focal_length: float = 24.0
-    """Horizontal field of view (in degrees)."""
+    """Perspective focal length (in cm). Defaults to 24.0cm.
+    Longer lens lengths narrower FOV, shorter lens lengths wider FOV.
+    """
+    horizontal_aperture: float = 20.955
+    """Horizontal aperture (in mm). Defaults to 20.955mm.
+    Emulates sensor/film width on a camera.
+    Note:
+        The default value is the horizontal aperture of a 35 mm spherical projector.
+    """
+    horizontal_aperture_offset: float = 0.0
+    """Offsets Resolution/Film gate horizontally. Defaults to 0.0."""
+    vertical_aperture_offset: float = 0.0
+    """Offsets Resolution/Film gate vertically. Defaults to 0.0."""
    width: int = MISSING
    """Width of the image (in pixels)."""
    height: int = MISSING
    """Height of the image (in pixels)."""
-    far_plane: float = MISSING
-    """Far plane of the image (in meters)."""
 @configclass

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster.py
@@ -8,10 +8,11 @@ from __future__ import annotations
 import numpy as np
 import torch
-from typing import TYPE_CHECKING, Sequence
+from typing import TYPE_CHECKING, ClassVar, Sequence
 import carb
 import omni.isaac.core.utils.prims as prim_utils
+import warp as wp
 from omni.isaac.core.articulations import ArticulationView
 from omni.isaac.core.prims import RigidPrimView, XFormPrimView
 from pxr import UsdGeom, UsdPhysics
@@ -47,6 +48,14 @@ class RayCaster(SensorBase):
    cfg: RayCasterCfg
    """The configuration parameters."""
+    meshes: ClassVar[dict[str, wp.Mesh]] = {}
+    """The warp meshes available for raycasting.
+    The keys correspond to the prim path for the meshes, and values are the corresponding warp Mesh objects.
+    Note:
+           We store a global dictionary of all warp meshes to prevent re-loading the mesh for different ray-cast sensor instances.
+    """
    def __init__(self, cfg: RayCasterCfg):
        """Initializes the ray-caster object.
@@ -58,8 +67,6 @@ class RayCaster(SensorBase):
        super().__init__(cfg)
        # Create empty variables for storing output data
        self._data = RayCasterData()
-        # List of meshes to ray-cast
-        self.warp_meshes = []
    def __str__(self) -> str:
        """Returns: A string containing information about the instance."""
@@ -67,7 +74,7 @@ class RayCaster(SensorBase):
            f"Ray-caster @ '{self.cfg.prim_path}': \n"
            f"\tview type            : {self._view.__class__}\n"
            f"\tupdate period (s)    : {self.cfg.update_period}\n"
-            f"\tnumber of meshes     : {len(self.warp_meshes)}\n"
+            f"\tnumber of meshes     : {len(RayCaster.meshes)}\n"
            f"\tnumber of sensors    : {self._view.count}\n"
            f"\tnumber of rays/sensor: {self.num_rays}\n"
            f"\ttotal number of rays : {self.num_rays * self._view.count}"
@@ -127,13 +134,24 @@ class RayCaster(SensorBase):
        self._view = prim_view_class(self.cfg.prim_path, reset_xform_properties=False)
        self._view.initialize()
+        # load the meshes by parsing the stage
+        self._initialize_warp_meshes()
+        # initialize the ray start and directions
+        self._initialize_rays_impl()
+    def _initialize_warp_meshes(self):
        # check number of mesh prims provided
        if len(self.cfg.mesh_prim_paths) != 1:
            raise NotImplementedError(
                f"RayCaster currently only supports one mesh prim. Received: {len(self.cfg.mesh_prim_paths)}"
            )
        # read prims to ray-cast
        for mesh_prim_path in self.cfg.mesh_prim_paths:
+            # check if mesh already casted into warp mesh
+            if mesh_prim_path in RayCaster.meshes:
+                continue
            # check if the prim is a plane - handle PhysX plane as a special case
            # if a plane exists then we need to create an infinite mesh that is a plane
            mesh_prim = prim_utils.get_first_matching_child_prim(
@@ -164,13 +182,15 @@ class RayCaster(SensorBase):
                # print info
                carb.log_info(f"Created infinite plane mesh prim: {mesh_prim.GetPath()}.")
            # add the warp mesh to the list
-            self.warp_meshes.append(wp_mesh)
+            RayCaster.meshes[mesh_prim_path] = wp_mesh
        # throw an error if no meshes are found
-        if len(self.warp_meshes) == 0:
+        if all([mesh_prim_path not in RayCaster.meshes for mesh_prim_path in self.cfg.mesh_prim_paths]):
            raise RuntimeError(
                f"No meshes found for ray-casting! Please check the mesh prim paths: {self.cfg.mesh_prim_paths}"
            )
+    def _initialize_rays_impl(self):
        # compute ray stars and directions
        self.ray_starts, self.ray_directions = self.cfg.pattern_cfg.func(self.cfg.pattern_cfg, self._device)
        self.num_rays = len(self.ray_directions)
@@ -210,7 +230,11 @@ class RayCaster(SensorBase):
            ray_directions_w = quat_apply(quat_w.repeat(1, self.num_rays), self.ray_directions[env_ids])
        # ray cast and store the hits
        # TODO: Make this work for multiple meshes?
-        self._data.ray_hits_w[env_ids] = raycast_mesh(ray_starts_w, ray_directions_w, self.warp_meshes[0])
+        self._data.ray_hits_w[env_ids] = raycast_mesh(
+            ray_starts_w,
+            ray_directions_w,
+            mesh=RayCaster.meshes[self.cfg.mesh_prim_paths[0]],
+        )[0]
    def _set_debug_vis_impl(self, debug_vis: bool):
        # set visibility of markers

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+from __future__ import annotations
+import torch
+from tensordict import TensorDict
+from typing import TYPE_CHECKING, ClassVar, Sequence
+from typing_extensions import Literal
+from omni.isaac.core.prims import XFormPrimView
+import omni.isaac.orbit.utils.math as math_utils
+from omni.isaac.orbit.sensors.camera import CameraData
+from omni.isaac.orbit.sensors.camera.utils import convert_orientation_convention, create_rotation_matrix_from_view
+from omni.isaac.orbit.utils.warp import raycast_mesh
+from .ray_caster import RayCaster
+if TYPE_CHECKING:
+    from .ray_caster_camera_cfg import RayCasterCameraCfg
+class RayCasterCamera(RayCaster):
+    """A ray-casting camera sensor.
+    The ray-caster camera uses a set of rays to get the distances to meshes in the scene. The rays are
+    defined in the sensor's local coordinate frame. The sensor has the same interface as the
+    :class:`omni.isaac.orbit.sensors.camera.Camera` that implements the camera class through USD camera prims.
+    However, this class provides a faster image generation. The sensor converts meshes from the list of
+    primitive paths provided in the configuration to Warp meshes. The camera then ray-casts against these
+    Warp meshes only.
+    Currently, only the following annotators are supported:
+    - ``"distance_to_camera"``: An image containing the distance to camera optical center.
+    - ``"distance_to_image_plane"``: An image containing distances of 3D points from camera plane along camera's z-axis.
+    - ``"normals"``: An image containing the local surface normal vectors at each pixel.
+    .. note::
+        Currently, only static meshes are supported. Extending the warp mesh to support dynamic meshes
+        is a work in progress.
+    """
+    cfg: RayCasterCameraCfg
+    """The configuration parameters."""
+    UNSUPPORTED_TYPES: ClassVar[set[str]] = {
+        "rgb",
+        "instance_id_segmentation",
+        "instance_segmentation",
+        "semantic_segmentation",
+        "skeleton_data",
+        "motion_vectors",
+        "bounding_box_2d_tight",
+        "bounding_box_2d_loose",
+        "bounding_box_3d",
+    }
+    """A set of sensor types that are not supported by the ray-caster camera."""
+    def __init__(self, cfg: RayCasterCameraCfg):
+        """Initializes the camera object.
+        Args:
+            cfg: The configuration parameters.
+        """
+        # initialize base class
+        super().__init__(cfg)
+        # Create empty variables for storing output data
+        self._data = CameraData()
+        # check if there is any intersection in unsupported types
+        # reason: we cannot obtain this data from simplified warp-based ray caster
+        common_elements = set(self.cfg.data_types) & RayCasterCamera.UNSUPPORTED_TYPES
+        if common_elements:
+            raise ValueError(
+                f"RayCasterCamera class does not support the following sensor types: {common_elements}."
+                "\n\tThis is because these sensor types cannot be obtained in a fast way using ''warp''."
+                "\n\tHint: If you need to work with these sensor types, we recommend using the USD camera"
+                " interface from the omni.isaac.orbit.sensors.camera module."
+            )
+    def __str__(self) -> str:
+        """Returns: A string containing information about the instance."""
+        return (
+            f"Ray-Caster-Camera @ '{self.cfg.prim_path}': \n"
+            f"\tview type            : {self._view.__class__}\n"
+            f"\tupdate period (s)    : {self.cfg.update_period}\n"
+            f"\tnumber of meshes     : {len(RayCaster.meshes)}\n"
+            f"\tnumber of sensors    : {self._view.count}\n"
+            f"\tnumber of rays/sensor: {self.num_rays}\n"
+            f"\ttotal number of rays : {self.num_rays * self._view.count}\n"
+            f"\timage shape          : {self.image_shape}"
+        )
+    """
+    Properties
+    """
+    @property
+    def data(self) -> CameraData:
+        # update sensors if needed
+        self._update_outdated_buffers()
+        # return the data
+        return self._data
+    @property
+    def image_shape(self) -> tuple[int, int]:
+        """A tuple containing (height, width) of the camera sensor."""
+        return (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width)
+    @property
+    def frame(self) -> torch.tensor:
+        """Frame number when the measurement took place."""
+        return self._frame
+    """
+    Operations.
+    """
+    def set_intrinsic_matrices(
+        self, matrices: torch.Tensor, focal_length: float = 1.0, env_ids: Sequence[int] | None = None
+    ):
+        """Set the intrinsic matrix of the camera.
+        Args:
+            matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3).
+            focal_length: Focal length to use when computing aperture values. Defaults to 1.0.
+            env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
+        """
+        # resolve env_ids
+        if env_ids is None:
+            env_ids = slice(None)
+        # save new intrinsic matrices and focal length
+        self._data.intrinsic_matrices[env_ids] = matrices.to(self._device)
+        self._focal_length = focal_length
+        # recompute ray directions
+        self.ray_starts[env_ids], self.ray_directions[env_ids] = self.cfg.pattern_cfg.func(
+            self.cfg.pattern_cfg, self._data.intrinsic_matrices[env_ids], self._device
+        )
+    def reset(self, env_ids: Sequence[int] | None = None):
+        # reset the timestamps
+        super().reset(env_ids)
+        # resolve None
+        if env_ids is None:
+            env_ids = slice(None)
+        # reset the data
+        # note: this recomputation is useful if one performs randomization on the camera poses.
+        pos_w, quat_w = self._compute_camera_world_poses(env_ids)
+        self._data.pos_w[env_ids] = pos_w
+        self._data.quat_w_world[env_ids] = quat_w
+        # Set all reset sensors to not outdated since their value won't be updated till next sim step.
+        self._is_outdated[env_ids] = False
+        # Reset the frame count
+        self._frame[env_ids] = 0
+    def set_world_poses(
+        self,
+        positions: torch.Tensor | None = None,
+        orientations: torch.Tensor | None = None,
+        env_ids: Sequence[int] | None = None,
+        convention: Literal["opengl", "ros", "world"] = "ros",
+    ):
+        """Set the pose of the camera w.r.t. the world frame using specified convention.
+        Since different fields use different conventions for camera orientations, the method allows users to
+        set the camera poses in the specified convention. Possible conventions are:
+        - :obj:`"opengl"` - forward axis: -Z - up axis +Y - Offset is applied in the OpenGL (Usd.Camera) convention
+        - :obj:`"ros"`    - forward axis: +Z - up axis -Y - Offset is applied in the ROS convention
+        - :obj:`"world"`  - forward axis: +X - up axis +Z - Offset is applied in the World Frame convention
+        See :meth:`omni.isaac.orbit.sensors.camera.utils.convert_orientation_convention` for more details
+        on the conventions.
+        Args:
+            positions: The cartesian coordinates (in meters). Shape is (N, 3).
+                Defaults to None, in which case the camera position in not changed.
+            orientations: The quaternion orientation in (w, x, y, z). Shape is (N, 4).
+                Defaults to None, in which case the camera orientation in not changed.
+            env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
+            convention: The convention in which the poses are fed. Defaults to "ros".
+        Raises:
+            RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
+        """
+        # resolve env_ids
+        if env_ids is None:
+            env_ids = self._ALL_INDICES
+        # get current positions
+        pos_w, quat_w = self._compute_view_world_poses(env_ids)
+        if positions is not None:
+            # transform to camera frame
+            pos_offset_world_frame = positions - pos_w
+            self._offset_pos[env_ids] = math_utils.quat_apply(math_utils.quat_inv(quat_w), pos_offset_world_frame)
+        if orientations is not None:
+            # convert rotation matrix from input convention to world
+            quat_w_set = convert_orientation_convention(orientations, origin=convention, target="world")
+            self._offset_quat[env_ids] = math_utils.quat_mul(math_utils.quat_inv(quat_w), quat_w_set)
+        # update the data
+        pos_w, quat_w = self._compute_camera_world_poses(env_ids)
+        self._data.pos_w[env_ids] = pos_w
+        self._data.quat_w_world[env_ids] = quat_w
+    def set_world_poses_from_view(
+        self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None
+    ):
+        """Set the poses of the camera from the eye position and look-at target position.
+        Args:
+            eyes: The positions of the camera's eye. Shape is N, 3).
+            targets: The target locations to look at. Shape is (N, 3).
+            env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
+        Raises:
+            RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
+            NotImplementedError: If the stage up-axis is not "Y" or "Z".
+        """
+        # camera position and rotation in opengl convention
+        orientations = math_utils.quat_from_matrix(create_rotation_matrix_from_view(eyes, targets, device=self._device))
+        self.set_world_poses(eyes, orientations, env_ids, convention="opengl")
+    """
+    Implementation.
+    """
+    def _initialize_rays_impl(self):
+        # Create all indices buffer
+        self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
+        # Create frame count buffer
+        self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
+        # create buffers
+        self._create_buffers()
+        # compute intrinsic matrices
+        self._compute_intrinsic_matrices()
+        # compute ray stars and directions
+        self.ray_starts, self.ray_directions = self.cfg.pattern_cfg.func(
+            self.cfg.pattern_cfg, self._data.intrinsic_matrices, self._device
+        )
+        self.num_rays = self.ray_directions.shape[1]
+        # create buffer to store ray hits
+        self.ray_hits_w = torch.zeros(self._view.count, self.num_rays, 3, device=self._device)
+        # set offsets
+        quat_w = convert_orientation_convention(
+            torch.tensor([self.cfg.offset.rot], device=self._device), origin=self.cfg.offset.convention, target="world"
+        )
+        self._offset_quat = quat_w.repeat(self._view.count, 1)
+        self._offset_pos = torch.tensor(list(self.cfg.offset.pos), device=self._device).repeat(self._view.count, 1)
+    def _update_buffers_impl(self, env_ids: Sequence[int]):
+        """Fills the buffers of the sensor data."""
+        # increment frame count
+        self._frame[env_ids] += 1
+        # compute poses from current view
+        pos_w, quat_w = self._compute_camera_world_poses(env_ids)
+        # update the data
+        self._data.pos_w[env_ids] = pos_w
+        self._data.quat_w_world[env_ids] = quat_w
+        # note: full orientation is considered
+        ray_starts_w = math_utils.quat_apply(quat_w.repeat(1, self.num_rays), self.ray_starts[env_ids])
+        ray_starts_w += pos_w.unsqueeze(1)
+        ray_directions_w = math_utils.quat_apply(quat_w.repeat(1, self.num_rays), self.ray_directions[env_ids])
+        # ray cast and store the hits
+        # TODO: Make ray-casting work for multiple meshes?
+        # necessary for regular dictionaries.
+        self.ray_hits_w, ray_depth, ray_normal, _ = raycast_mesh(
+            ray_starts_w,
+            ray_directions_w,
+            mesh=RayCasterCamera.meshes[self.cfg.mesh_prim_paths[0]],
+            max_dist=self.cfg.max_distance,
+            return_distance=any(
+                [name in self.cfg.data_types for name in ["distance_to_image_plane", "distance_to_camera"]]
+            ),
+            return_normal="normals" in self.cfg.data_types,
+        )
+        # update output buffers
+        if "distance_to_image_plane" in self.cfg.data_types:
+            # note: data is in camera frame so we only take the first component (z-axis of camera frame)
+            distance_to_image_plane = (
+                math_utils.quat_apply(
+                    math_utils.quat_inv(quat_w).repeat(1, self.num_rays),
+                    (ray_depth[:, :, None] * ray_directions_w),
+                )
+            )[:, :, 0]
+            self._data.output["distance_to_image_plane"][env_ids] = distance_to_image_plane.view(-1, *self.image_shape)
+        if "distance_to_camera" in self.cfg.data_types:
+            self._data.output["distance_to_camera"][env_ids] = ray_depth.view(-1, *self.image_shape)
+        if "normals" in self.cfg.data_types:
+            self._data.output["normals"][env_ids] = ray_normal.view(-1, *self.image_shape, 3)
+    def _debug_vis_callback(self, event):
+        # in case it crashes be safe
+        if not hasattr(self, "ray_hits_w"):
+            return
+        # show ray hit positions
+        self.ray_visualizer.visualize(self.ray_hits_w.view(-1, 3))
+    """
+    Private Helpers
+    """
+    def _create_buffers(self):
+        """Create buffers for storing data."""
+        # create the data object
+        # -- pose of the cameras
+        self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
+        self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
+        # -- intrinsic matrix
+        self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
+        self._data.intrinsic_matrices[:, 2, 2] = 1.0
+        self._data.image_shape = self.image_shape
+        # -- output data
+        # create the buffers to store the annotator data.
+        self._data.output = TensorDict({}, batch_size=self._view.count, device=self.device)
+        self._data.info = [{name: None for name in self.cfg.data_types}] * self._view.count
+        for name in self.cfg.data_types:
+            if name in ["distance_to_image_plane", "distance_to_camera"]:
+                shape = (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width)
+            elif name in ["normals"]:
+                shape = (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width, 3)
+            else:
+                raise ValueError(f"Received unknown data type: {name}. Please check the configuration.")
+            # allocate tensor to store the data
+            self._data.output[name] = torch.zeros((self._view.count, *shape), device=self._device)
+    def _compute_intrinsic_matrices(self):
+        """Computes the intrinsic matrices for the camera based on the config provided."""
+        # get the sensor properties
+        pattern_cfg = self.cfg.pattern_cfg
+        # compute the intrinsic matrix
+        vertical_aperture = pattern_cfg.horizontal_aperture * pattern_cfg.height / pattern_cfg.width
+        f_x = pattern_cfg.width * pattern_cfg.focal_length / pattern_cfg.horizontal_aperture
+        f_y = pattern_cfg.height * pattern_cfg.focal_length / vertical_aperture
+        c_x = pattern_cfg.horizontal_aperture_offset * f_x + pattern_cfg.width / 2
+        c_y = pattern_cfg.vertical_aperture_offset * f_y + pattern_cfg.height / 2
+        # allocate the intrinsic matrices
+        self._data.intrinsic_matrices[:, 0, 0] = f_x
+        self._data.intrinsic_matrices[:, 0, 2] = c_x
+        self._data.intrinsic_matrices[:, 1, 1] = f_y
+        self._data.intrinsic_matrices[:, 1, 2] = c_y
+        # save focal length
+        self._focal_length = pattern_cfg.focal_length
+    def _compute_view_world_poses(self, env_ids: Sequence[int]) -> tuple[torch.Tensor, torch.Tensor]:
+        """Obtains the pose of the view the camera is attached to in the world frame.
+        Returns:
+            A tuple of the position (in meters) and quaternion (w, x, y, z).
+        """
+        # obtain the poses of the sensors
+        # note: clone arg doesn't exist for xform prim view so we need to do this manually
+        if isinstance(self._view, XFormPrimView):
+            pos_w_temp, quat_w_temp = self._view.get_world_poses(env_ids)
+            pos_w = pos_w_temp.clone()
+            quat_w = quat_w_temp.clone()
+        else:
+            pos_w, quat_w = self._view.get_world_poses(env_ids, clone=True)
+        return pos_w, quat_w
+    def _compute_camera_world_poses(self, env_ids: Sequence[int]) -> tuple[torch.Tensor, torch.Tensor]:
+        """Computes the pose of the camera in the world frame.
+        This function applies the offset pose to the pose of the view the camera is attached to.
+        Returns:
+            A tuple of the position (in meters) and quaternion (w, x, y, z) in "world" convention.
+        """
+        # get the pose of the view the camera is attached to
+        pos_w, quat_w = self._compute_view_world_poses(env_ids)
+        # apply offsets
+        # need to apply quat because offset relative to parent frame
+        pos_w += math_utils.quat_apply(quat_w, self._offset_pos[env_ids])
+        quat_w = math_utils.quat_mul(quat_w, self._offset_quat[env_ids])
+        return pos_w, quat_w
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera_cfg.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""Configuration for the ray-cast sensor."""
+from __future__ import annotations
+from typing_extensions import Literal
+from omni.isaac.orbit.utils import configclass
+from .ray_caster_camera import RayCasterCamera
+from .ray_caster_cfg import RayCasterCfg
+@configclass
+class RayCasterCameraCfg(RayCasterCfg):
+    """Configuration for the ray-cast sensor."""
+    @configclass
+    class OffsetCfg:
+        """The offset pose of the sensor's frame from the sensor's parent frame."""
+        pos: tuple[float, float, float] = (0.0, 0.0, 0.0)
+        """Translation w.r.t. the parent frame. Defaults to (0.0, 0.0, 0.0)."""
+        rot: tuple[float, float, float, float] = (1.0, 0.0, 0.0, 0.0)
+        """Quaternion rotation ``(w, x, y, z)`` w.r.t. the parent frame. Defaults to (1.0, 0.0, 0.0, 0.0)."""
+        convention: Literal["opengl", "ros", "world"] = "ros"
+        """The convention in which the frame offset is applied. Defaults to "ros".
+        - ``"opengl"`` - forward axis: ``-Z`` - up axis: ``+Y`` - Offset is applied in the OpenGL (Usd.Camera) convention.
+        - ``"ros"``    - forward axis: ``+Z`` - up axis: ``-Y`` - Offset is applied in the ROS convention.
+        - ``"world"``  - forward axis: ``+X`` - up axis: ``+Z`` - Offset is applied in the World Frame convention.
+        """
+    class_type: type = RayCasterCamera
+    offset: OffsetCfg = OffsetCfg()
+    """The offset pose of the sensor's frame from the sensor's parent frame. Defaults to identity."""
+    data_types: list[str] = ["distance_to_image_plane"]
+    """List of sensor names/types to enable for the camera. Defaults to ["distance_to_image_plane"]."""
+    def __post_init__(self):
+        # for cameras, this quantity should be False always.
+        self.attach_yaw_only = False
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/sensors/sensors_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/sensors/sensors_cfg.py
@@ -33,8 +33,12 @@ class PinholeCameraCfg(SpawnerCfg):
    Note:
        Currently only "pinhole" is supported.
    """
-    clipping_range: tuple[float, float] = (1.0, 1e6)
+    clipping_range: tuple[float, float] = (0.01, 1e6)
-    """Near and far clipping distances (in m). Defaults to (1.0, 1e6)."""
+    """Near and far clipping distances (in m). Defaults to (0.01, 1e6).
+    The minimum clipping range will shift the camera forward by the specified distance. Don't set it too high to
+    avoid issues for distance related data types (e.g., ``distance_to_image_plane``).
+    """
    focal_length: float = 24.0
    """Perspective focal length (in cm). Defaults to 24.0cm.

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/__init__.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/__init__.py
@@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
+from __future__ import annotations
 """Operations based on warp."""

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/kernels.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/kernels.py
@@ -15,6 +15,13 @@ def raycast_mesh_kernel(
    ray_starts: wp.array(dtype=wp.vec3),
    ray_directions: wp.array(dtype=wp.vec3),
    ray_hits: wp.array(dtype=wp.vec3),
+    ray_distance: wp.array(dtype=wp.float32),
+    ray_normal: wp.array(dtype=wp.vec3),
+    ray_face_id: wp.array(dtype=wp.int32),
+    max_dist: float = 1e6,
+    return_distance: int = False,
+    return_normal: int = False,
+    return_face_id: int = False,
 ):
    """Performs ray-casting against a mesh.
@@ -36,7 +43,6 @@ def raycast_mesh_kernel(
    # get the thread id
    tid = wp.tid()
-    max_dist = float(1e6)  # max ray-cast distance
    t = float(0.0)  # hit distance along ray
    u = float(0.0)  # hit face barycentric u
    v = float(0.0)  # hit face barycentric v
@@ -47,3 +53,9 @@ def raycast_mesh_kernel(
    # ray cast against the mesh and store the hit position
    if wp.mesh_query_ray(mesh, ray_starts[tid], ray_directions[tid], max_dist, t, u, v, sign, n, f):
        ray_hits[tid] = ray_starts[tid] + t * ray_directions[tid]
+        if return_distance == 1:
+            ray_distance[tid] = t
+        if return_normal == 1:
+            ray_normal[tid] = n
+        if return_face_id == 1:
+            ray_face_id[tid] = f
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/ops.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/warp/ops.py
@@ -14,58 +14,122 @@ import warp as wp
 from . import kernels
-def raycast_mesh(ray_starts: torch.Tensor, ray_directions: torch.Tensor, mesh: wp.Mesh) -> torch.Tensor:
+def raycast_mesh(
+    ray_starts: torch.Tensor,
+    ray_directions: torch.Tensor,
+    mesh: wp.Mesh,
+    max_dist: float = 1e6,
+    return_distance: bool = False,
+    return_normal: bool = False,
+    return_face_id: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None]:
    """Performs ray-casting against a mesh.
    Note that the `ray_starts` and `ray_directions`, and `ray_hits` should have compatible shapes
    and data types to ensure proper execution. Additionally, they all must be in the same frame.
    Args:
-        ray_start: The starting position of the rays. Shape (N, 3).
+        ray_starts: The starting position of the rays. Shape (N, 3).
        ray_directions: The ray directions for each ray. Shape (N, 3).
        mesh: The warp mesh to ray-cast against.
+        max_dist: The maximum distance to ray-cast. Defaults to 1e6.
+        return_distance: Whether to return the distance of the ray until it hits the mesh. Defaults to False.
+        return_normal: Whether to return the normal of the mesh face the ray hits. Defaults to False.
+        return_face_id: Whether to return the face id of the mesh face the ray hits. Defaults to False.
    Returns:
-        The ray hit position. Shape (N, 3). The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit position. Shape (N, 3).
+            The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit distance. Shape (N,).
+            Will only return if `return_distance` is True, else returns None.
+            Always at second position of the output tuple.
+            The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit normal. Shape (N, 3).
+            Will only return if `return_normal` is True else returns None.
+            Always at third position of the output tuple.
+            The returned tensor contains :obj:`float('inf')` for missed hits.
+        The ray hit face id. Shape (N,).
+            Will only return if `return_face_id` is True else returns None.
+            Always at fourth position of the output tuple.
+            The returned tensor contains :obj:`int(-1)` for missed hits.
    """
    # extract device and shape information
    shape = ray_starts.shape
    device = ray_starts.device
    # device of the mesh
-    mesh_device = wp.device_to_torch(mesh.device)
+    torch_device = wp.device_to_torch(mesh.device)
    # reshape the tensors
-    ray_starts = ray_starts.to(mesh_device).view(-1, 3)
+    ray_starts = ray_starts.to(torch_device).view(-1, 3).contiguous()
-    ray_directions = ray_directions.to(mesh_device).view(-1, 3)
+    ray_directions = ray_directions.to(torch_device).view(-1, 3).contiguous()
    num_rays = ray_starts.shape[0]
    # create output tensor for the ray hits
-    ray_hits = torch.full((num_rays, 3), float("inf"), device=mesh_device)
+    ray_hits = torch.full((num_rays, 3), float("inf"), device=torch_device).contiguous()
    # map the memory to warp arrays
    ray_starts_wp = wp.from_torch(ray_starts, dtype=wp.vec3)
    ray_directions_wp = wp.from_torch(ray_directions, dtype=wp.vec3)
    ray_hits_wp = wp.from_torch(ray_hits, dtype=wp.vec3)
+    if return_distance:
+        ray_distance = torch.full((num_rays,), float("inf"), device=torch_device).contiguous()
+        ray_distance_wp = wp.from_torch(ray_distance, dtype=wp.float32)
+    else:
+        ray_distance = None
+        ray_distance_wp = wp.empty((1,), dtype=wp.float32, device=torch_device)
+    if return_normal:
+        ray_normal = torch.full((num_rays, 3), float("inf"), device=torch_device).contiguous()
+        ray_normal_wp = wp.from_torch(ray_normal, dtype=wp.vec3)
+    else:
+        ray_normal = None
+        ray_normal_wp = wp.empty((1,), dtype=wp.vec3, device=torch_device)
+    if return_face_id:
+        ray_face_id = torch.ones((num_rays,), dtype=torch.int32, device=torch_device).contiguous() * (-1)
+        ray_face_id_wp = wp.from_torch(ray_face_id, dtype=wp.int32)
+    else:
+        ray_face_id = None
+        ray_face_id_wp = wp.empty((1,), dtype=wp.int32, device=torch_device)
    # launch the warp kernel
    wp.launch(
        kernel=kernels.raycast_mesh_kernel,
        dim=num_rays,
-        inputs=[mesh.id, ray_starts_wp, ray_directions_wp, ray_hits_wp],
+        inputs=[
+            mesh.id,
+            ray_starts_wp,
+            ray_directions_wp,
+            ray_hits_wp,
+            ray_distance_wp,
+            ray_normal_wp,
+            ray_face_id_wp,
+            float(max_dist),
+            int(return_distance),
+            int(return_normal),
+            int(return_face_id),
+        ],
        device=mesh.device,
    )
    # NOTE: Synchronize is not needed anymore, but we keep it for now. Check with @dhoeller.
    wp.synchronize()
-    return ray_hits.to(device).view(shape)
+    if return_distance:
+        ray_distance = ray_distance.to(device).view(shape[0], shape[1])
+    if return_normal:
+        ray_normal = ray_normal.to(device).view(shape)
+    if return_face_id:
+        ray_face_id = ray_face_id.to(device).view(shape[0], shape[1])
+    return ray_hits.to(device).view(shape), ray_distance, ray_normal, ray_face_id
 def convert_to_warp_mesh(points: np.ndarray, indices: np.ndarray, device: str) -> wp.Mesh:
    """Create a warp mesh object with a mesh defined from vertices and triangles.
    Args:
-        points: The vertices of the mesh. Shape is :math:`(N, 3)`, where :math:`N`
+        points: The vertices of the mesh. Shape is (N, 3), where N is the number of vertices.
-            is the number of vertices.
        indices: The triangles of the mesh as references to vertices for each triangle.
-            Shape is :math:`(M, 3)`, where :math:`M` is the number of triangles / faces.
+            Shape is (M, 3), where M is the number of triangles / faces.
        device: The device to use for the mesh.
    Returns:

--- a/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py
+++ b/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py
@@ -23,6 +23,7 @@ import numpy as np
 import os
 import random
 import scipy.spatial.transform as tf
+import torch
 import traceback
 import unittest
@@ -246,7 +247,7 @@ class TestCamera(unittest.TestCase):
        # play sim
        self.sim.reset()
        # set new pose
-        camera.set_world_poses([POSITION], [QUAT_WORLD], convention="world")
+        camera.set_world_poses(torch.tensor([POSITION]), torch.tensor([QUAT_WORLD]), convention="world")
        np.testing.assert_allclose(camera.data.pos_w, [POSITION], rtol=1e-5)
        np.testing.assert_allclose(camera.data.quat_w_world, [QUAT_WORLD], rtol=1e-5)
@@ -256,7 +257,7 @@ class TestCamera(unittest.TestCase):
        # play sim
        self.sim.reset()
        # set new pose
-        camera.set_world_poses_from_view([POSITION], [[0.0, 0.0, 0.0]])
+        camera.set_world_poses_from_view(torch.tensor([POSITION]), torch.tensor([[0.0, 0.0, 0.0]]))
        np.testing.assert_allclose(camera.data.pos_w, [POSITION], rtol=1e-5)
        np.testing.assert_allclose(camera.data.quat_w_ros, [QUAT_ROS], rtol=1e-5)
@@ -308,7 +309,7 @@ class TestCamera(unittest.TestCase):
        # Play simulator
        self.sim.reset()
        # Set camera pose
-        camera.set_world_poses_from_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
+        camera.set_world_poses_from_view(torch.tensor([[2.5, 2.5, 2.5]]), torch.tensor([[0.0, 0.0, 0.0]]))
        # Simulate for a few steps
        # note: This is a workaround to ensure that the textures are loaded.
        #   Check "Known Issues" section in the documentation for more details.

--- a/source/extensions/omni.isaac.orbit/test/sensors/test_ray_caster_camera.py
+++ b/source/extensions/omni.isaac.orbit/test/sensors/test_ray_caster_camera.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# ignore private usage of variables warning
+# pyright: reportPrivateUsage=none
+"""Launch Isaac Sim Simulator first."""
+from omni.isaac.orbit.app import AppLauncher
+# launch omniverse app
+app_launcher = AppLauncher(headless=True)
+simulation_app = app_launcher.app
+"""Rest everything follows."""
+import copy
+import numpy as np
+import os
+import torch
+import traceback
+import unittest
+import carb
+import omni.isaac.core.utils.prims as prim_utils
+import omni.isaac.core.utils.stage as stage_utils
+import omni.replicator.core as rep
+from omni.isaac.core.simulation_context import SimulationContext
+from pxr import Gf
+from omni.isaac.orbit.sensors.camera import Camera, CameraCfg
+from omni.isaac.orbit.sensors.ray_caster import RayCasterCamera, RayCasterCameraCfg, patterns
+from omni.isaac.orbit.sim import PinholeCameraCfg
+from omni.isaac.orbit.terrains.trimesh.utils import make_plane
+from omni.isaac.orbit.terrains.utils import create_prim_from_mesh
+from omni.isaac.orbit.utils import convert_dict_to_backend
+from omni.isaac.orbit.utils.timer import Timer
+# sample camera poses
+POSITION = [2.5, 2.5, 2.5]
+QUAT_ROS = [-0.17591989, 0.33985114, 0.82047325, -0.42470819]
+QUAT_OPENGL = [0.33985113, 0.17591988, 0.42470818, 0.82047324]
+QUAT_WORLD = [-0.3647052, -0.27984815, -0.1159169, 0.88047623]
+class TestWarpCamera(unittest.TestCase):
+    """Test for orbit camera sensor"""
+    """
+    Test Setup and Teardown
+    """
+    def setUp(self):
+        """Create a blank new stage for each test."""
+        camera_pattern_cfg = patterns.PinholeCameraPatternCfg(
+            focal_length=24.0,
+            horizontal_aperture=20.955,
+            height=480,
+            width=640,
+        )
+        self.camera_cfg = RayCasterCameraCfg(
+            prim_path="/World/Camera",
+            mesh_prim_paths=["/World/defaultGroundPlane"],
+            update_period=0,
+            offset=RayCasterCameraCfg.OffsetCfg(pos=(0.0, 0.0, 0.0), rot=(1.0, 0.0, 0.0, 0.0), convention="world"),
+            debug_vis=False,
+            pattern_cfg=camera_pattern_cfg,
+            data_types=[
+                "distance_to_image_plane",
+            ],
+        )
+        # Create a new stage
+        stage_utils.create_new_stage()
+        # create xform because placement of camera directly under world is not supported
+        prim_utils.create_prim("/World/Camera", "Xform")
+        # Simulation time-step
+        self.dt = 0.01
+        # Load kit helper
+        self.sim = SimulationContext(physics_dt=self.dt, rendering_dt=self.dt, backend="torch", device="cpu")
+        # Ground-plane
+        mesh = make_plane(size=(2e1, 2e1), height=0.0, center_zero=True)
+        create_prim_from_mesh("/World/defaultGroundPlane", mesh)
+        # load stage
+        stage_utils.update_stage()
+    def tearDown(self):
+        """Stops simulator after each test."""
+        # close all the opened viewport from before.
+        rep.vp_manager.destroy_hydra_textures("Replicator")
+        # stop simulation
+        # note: cannot use self.sim.stop() since it does one render step after stopping!! This doesn't make sense :(
+        self.sim._timeline.stop()
+        # clear the stage
+        self.sim.clear()
+        self.sim.clear_instance()
+    """
+    Tests
+    """
+    def test_camera_init(self):
+        """Test camera initialization."""
+        # Create camera
+        camera = RayCasterCamera(cfg=self.camera_cfg)
+        # Play sim
+        self.sim.reset()
+        # Check if camera is initialized
+        self.assertTrue(camera._is_initialized)
+        # Simulate for a few steps
+        # note: This is a workaround to ensure that the textures are loaded.
+        #   Check "Known Issues" section in the documentation for more details.
+        for _ in range(5):
+            self.sim.step()
+        # Check buffers that exists and have correct shapes
+        self.assertTrue(camera.data.pos_w.shape == (1, 3))
+        self.assertTrue(camera.data.quat_w_ros.shape == (1, 4))
+        self.assertTrue(camera.data.quat_w_world.shape == (1, 4))
+        self.assertTrue(camera.data.quat_w_opengl.shape == (1, 4))
+        self.assertTrue(camera.data.intrinsic_matrices.shape == (1, 3, 3))
+        self.assertTrue(
+            camera.data.image_shape == (self.camera_cfg.pattern_cfg.height, self.camera_cfg.pattern_cfg.width)
+        )
+        self.assertTrue(camera.data.info == [{self.camera_cfg.data_types[0]: None}])
+        # Simulate physics
+        for _ in range(10):
+            # perform rendering
+            self.sim.step()
+            # update camera
+            camera.update(self.dt)
+            # check image data
+            for im_data in camera.data.output.to_dict().values():
+                self.assertTrue(
+                    im_data.shape == (1, self.camera_cfg.pattern_cfg.height, self.camera_cfg.pattern_cfg.width)
+                )
+    def test_camera_resolution(self):
+        """Test camera resolution is correctly set."""
+        # Create camera
+        camera = RayCasterCamera(cfg=self.camera_cfg)
+        # Play sim
+        self.sim.reset()
+        # Simulate for a few steps
+        # note: This is a workaround to ensure that the textures are loaded.
+        #   Check "Known Issues" section in the documentation for more details.
+        for _ in range(5):
+            self.sim.step()
+        camera.update(self.dt)
+        # access image data and compare shapes
+        for im_data in camera.data.output.to_dict().values():
+            self.assertTrue(im_data.shape == (1, self.camera_cfg.pattern_cfg.height, self.camera_cfg.pattern_cfg.width))
+    def test_camera_init_offset(self):
+        """Test camera initialization with offset using different conventions."""
+        # define the same offset in all conventions
+        # -- ROS convention
+        cam_cfg_offset_ros = copy.deepcopy(self.camera_cfg)
+        cam_cfg_offset_ros.offset = RayCasterCameraCfg.OffsetCfg(
+            pos=POSITION,
+            rot=QUAT_ROS,
+            convention="ros",
+        )
+        prim_utils.create_prim("/World/CameraOffsetRos", "Xform")
+        cam_cfg_offset_ros.prim_path = "/World/CameraOffsetRos"
+        camera_ros = RayCasterCamera(cam_cfg_offset_ros)
+        # -- OpenGL convention
+        cam_cfg_offset_opengl = copy.deepcopy(self.camera_cfg)
+        cam_cfg_offset_opengl.offset = RayCasterCameraCfg.OffsetCfg(
+            pos=POSITION,
+            rot=QUAT_OPENGL,
+            convention="opengl",
+        )
+        prim_utils.create_prim("/World/CameraOffsetOpengl", "Xform")
+        cam_cfg_offset_opengl.prim_path = "/World/CameraOffsetOpengl"
+        camera_opengl = RayCasterCamera(cam_cfg_offset_opengl)
+        # -- World convention
+        cam_cfg_offset_world = copy.deepcopy(self.camera_cfg)
+        cam_cfg_offset_world.offset = RayCasterCameraCfg.OffsetCfg(
+            pos=POSITION,
+            rot=QUAT_WORLD,
+            convention="world",
+        )
+        prim_utils.create_prim("/World/CameraOffsetWorld", "Xform")
+        cam_cfg_offset_world.prim_path = "/World/CameraOffsetWorld"
+        camera_world = RayCasterCamera(cam_cfg_offset_world)
+        # play sim
+        self.sim.reset()
+        # update cameras
+        camera_world.update(self.dt)
+        camera_opengl.update(self.dt)
+        camera_ros.update(self.dt)
+        # check that all transforms are set correctly
+        np.testing.assert_allclose(camera_ros.data.pos_w[0].numpy(), cam_cfg_offset_ros.offset.pos)
+        np.testing.assert_allclose(camera_opengl.data.pos_w[0].numpy(), cam_cfg_offset_opengl.offset.pos)
+        np.testing.assert_allclose(camera_world.data.pos_w[0].numpy(), cam_cfg_offset_world.offset.pos)
+        # check if transform correctly set in output
+        np.testing.assert_allclose(camera_ros.data.pos_w[0], cam_cfg_offset_ros.offset.pos, rtol=1e-5)
+        np.testing.assert_allclose(camera_ros.data.quat_w_ros[0], QUAT_ROS, rtol=1e-5)
+        np.testing.assert_allclose(camera_ros.data.quat_w_opengl[0], QUAT_OPENGL, rtol=1e-5)
+        np.testing.assert_allclose(camera_ros.data.quat_w_world[0], QUAT_WORLD, rtol=1e-5)
+    def test_multi_camera_init(self):
+        """Test multi-camera initialization."""
+        # create two cameras with different prim paths
+        # -- camera 1
+        cam_cfg_1 = copy.deepcopy(self.camera_cfg)
+        cam_cfg_1.prim_path = "/World/Camera_1"
+        prim_utils.create_prim("/World/Camera_1", "Xform")
+        # Create camera
+        cam_1 = RayCasterCamera(cam_cfg_1)
+        # -- camera 2
+        cam_cfg_2 = copy.deepcopy(self.camera_cfg)
+        cam_cfg_2.prim_path = "/World/Camera_2"
+        prim_utils.create_prim("/World/Camera_2", "Xform")
+        cam_2 = RayCasterCamera(cam_cfg_2)
+        # check that the loaded meshes are equal
+        self.assertTrue(cam_1.meshes == cam_2.meshes)
+        # play sim
+        self.sim.reset()
+        # Simulate for a few steps
+        # note: This is a workaround to ensure that the textures are loaded.
+        #   Check "Known Issues" section in the documentation for more details.
+        for _ in range(5):
+            self.sim.step()
+        # Simulate physics
+        for _ in range(10):
+            # perform rendering
+            self.sim.step()
+            # update camera
+            cam_1.update(self.dt)
+            cam_2.update(self.dt)
+            # check image data
+            for cam in [cam_1, cam_2]:
+                for im_data in cam.data.output.to_dict().values():
+                    self.assertTrue(
+                        im_data.shape == (1, self.camera_cfg.pattern_cfg.height, self.camera_cfg.pattern_cfg.width)
+                    )
+    def test_camera_set_world_poses(self):
+        """Test camera function to set specific world pose."""
+        camera = RayCasterCamera(self.camera_cfg)
+        # play sim
+        self.sim.reset()
+        # set new pose
+        camera.set_world_poses(torch.tensor([POSITION]), torch.tensor([QUAT_WORLD]), convention="world")
+        np.testing.assert_allclose(camera.data.pos_w, [POSITION], rtol=1e-5)
+        np.testing.assert_allclose(camera.data.quat_w_world, [QUAT_WORLD], rtol=1e-5)
+    def test_camera_set_world_poses_from_view(self):
+        """Test camera function to set specific world pose from view."""
+        camera = RayCasterCamera(self.camera_cfg)
+        # play sim
+        self.sim.reset()
+        # set new pose
+        camera.set_world_poses_from_view(torch.tensor([POSITION]), torch.tensor([[0.0, 0.0, 0.0]]))
+        np.testing.assert_allclose(camera.data.pos_w, [POSITION], rtol=1e-5)
+        np.testing.assert_allclose(camera.data.quat_w_ros, [QUAT_ROS], rtol=1e-5)
+    def test_intrinsic_matrix(self):
+        """Checks that the camera's set and retrieve methods work for intrinsic matrix."""
+        camera_cfg = copy.deepcopy(self.camera_cfg)
+        camera_cfg.pattern_cfg.height = 240
+        camera_cfg.pattern_cfg.width = 320
+        camera = RayCasterCamera(camera_cfg)
+        # play sim
+        self.sim.reset()
+        # Desired properties (obtained from realsense camera at 320x240 resolution)
+        rs_intrinsic_matrix = [229.31640625, 0.0, 164.810546875, 0.0, 229.826171875, 122.1650390625, 0.0, 0.0, 1.0]
+        rs_intrinsic_matrix = torch.tensor(rs_intrinsic_matrix).reshape(3, 3).unsqueeze(0)
+        # Set matrix into simulator
+        camera.set_intrinsic_matrices(rs_intrinsic_matrix)
+        # Simulate for a few steps
+        # note: This is a workaround to ensure that the textures are loaded.
+        #   Check "Known Issues" section in the documentation for more details.
+        for _ in range(5):
+            self.sim.step()
+        # Simulate physics
+        for _ in range(10):
+            # perform rendering
+            self.sim.step()
+            # update camera
+            camera.update(self.dt)
+            # Check that matrix is correct
+            K = camera.data.intrinsic_matrices[0].numpy()
+            # TODO: This is not correctly setting all values in the matrix since the
+            #       vertical aperture and aperture offsets are not being set correctly
+            #       This is a bug in the simulator.
+            self.assertAlmostEqual(rs_intrinsic_matrix[0, 0, 0].numpy(), K[0, 0], 4)
+            # self.assertAlmostEqual(rs_intrinsic_matrix[1, 1], K[1, 1], 4)
+    def test_throughput(self):
+        """Checks that the single camera gets created properly with a rig."""
+        # Create directory temp dir to dump the results
+        file_dir = os.path.dirname(os.path.realpath(__file__))
+        temp_dir = os.path.join(file_dir, "output", "camera", "throughput")
+        os.makedirs(temp_dir, exist_ok=True)
+        # Create replicator writer
+        rep_writer = rep.BasicWriter(output_dir=temp_dir, frame_padding=3)
+        # create camera
+        camera_cfg = copy.deepcopy(self.camera_cfg)
+        camera_cfg.pattern_cfg.height = 480
+        camera_cfg.pattern_cfg.width = 640
+        camera = RayCasterCamera(camera_cfg)
+        # Play simulator
+        self.sim.reset()
+        # Set camera pose
+        camera.set_world_poses_from_view(torch.tensor([[2.5, 2.5, 2.5]]), torch.tensor([[0.0, 0.0, 0.0]]))
+        # Simulate for a few steps
+        # note: This is a workaround to ensure that the textures are loaded.
+        #   Check "Known Issues" section in the documentation for more details.
+        for _ in range(5):
+            self.sim.step()
+        # Simulate physics
+        for _ in range(5):
+            # perform rendering
+            self.sim.step()
+            # update camera
+            with Timer(f"Time taken for updating camera with shape {camera.image_shape}"):
+                camera.update(self.dt)
+            # Save images
+            with Timer(f"Time taken for writing data with shape {camera.image_shape}   "):
+                # Pack data back into replicator format to save them using its writer
+                rep_output = dict()
+                camera_data = convert_dict_to_backend(camera.data.output[0].to_dict(), backend="numpy")
+                for key, data, info in zip(camera_data.keys(), camera_data.values(), camera.data.info[0].values()):
+                    if info is not None:
+                        rep_output[key] = {"data": data, "info": info}
+                    else:
+                        rep_output[key] = data
+                # Save images
+                rep_output["trigger_outputs"] = {"on_time": camera.frame[0]}
+                rep_writer.write(rep_output)
+            print("----------------------------------------")
+            # Check image data
+            for im_data in camera.data.output.values():
+                self.assertTrue(im_data.shape == (1, camera_cfg.pattern_cfg.height, camera_cfg.pattern_cfg.width))
+    def test_output_equal_to_usdcamera(self):
+        camera_pattern_cfg = patterns.PinholeCameraPatternCfg(
+            focal_length=24.0,
+            horizontal_aperture=20.955,
+            height=240,
+            width=320,
+        )
+        prim_utils.create_prim("/World/Camera_warp", "Xform")
+        camera_cfg_warp = RayCasterCameraCfg(
+            prim_path="/World/Camera",
+            mesh_prim_paths=["/World/defaultGroundPlane"],
+            update_period=0,
+            offset=RayCasterCameraCfg.OffsetCfg(pos=(0.0, 0.0, 0.0), rot=(1.0, 0.0, 0.0, 0.0)),
+            debug_vis=False,
+            pattern_cfg=camera_pattern_cfg,
+            data_types=["distance_to_image_plane", "distance_to_camera", "normals"],
+        )
+        camera_warp = RayCasterCamera(camera_cfg_warp)
+        # create usd camera
+        camera_cfg_usd = CameraCfg(
+            height=240,
+            width=320,
+            prim_path="/World/Camera_usd",
+            update_period=0,
+            data_types=["distance_to_image_plane", "distance_to_camera", "normals"],
+            spawn=PinholeCameraCfg(
+                focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(1e-4, 1.0e5)
+            ),
+            colorize=False,
+        )
+        camera_usd = Camera(camera_cfg_usd)
+        # play sim
+        self.sim.reset()
+        self.sim.play()
+        # set views
+        camera_warp.set_world_poses_from_view(torch.tensor([[2.5, 2.5, 4.5]]), torch.tensor([[0.0, 0.0, 0.0]]))
+        camera_usd.set_world_poses_from_view(torch.tensor([[2.5, 2.5, 4.5]]), torch.tensor([[0.0, 0.0, 0.0]]))
+        # perform steps
+        for _ in range(5):
+            self.sim.step()
+        # update camera
+        camera_usd.update(self.dt)
+        camera_warp.update(self.dt)
+        # check image data
+        np.testing.assert_allclose(
+            camera_usd.data.output["distance_to_image_plane"].numpy(),
+            camera_warp.data.output["distance_to_image_plane"].numpy(),
+            rtol=5e-3,
+        )
+        np.testing.assert_allclose(
+            camera_usd.data.output["distance_to_camera"].numpy(),
+            camera_warp.data.output["distance_to_camera"].numpy(),
+            rtol=5e-3,
+        )
+        np.testing.assert_allclose(
+            camera_usd.data.output["normals"].numpy()[..., :3],
+            camera_warp.data.output["normals"].numpy(),
+            rtol=1e-5,
+            atol=1e-4,
+        )
+    def test_output_equal_to_usdcamera_offset(self):
+        offset_rot = [-0.1251, 0.3617, 0.8731, -0.3020]
+        camera_pattern_cfg = patterns.PinholeCameraPatternCfg(
+            focal_length=24.0,
+            horizontal_aperture=20.955,
+            height=240,
+            width=320,
+        )
+        prim_utils.create_prim("/World/Camera_warp", "Xform")
+        camera_cfg_warp = RayCasterCameraCfg(
+            prim_path="/World/Camera",
+            mesh_prim_paths=["/World/defaultGroundPlane"],
+            update_period=0,
+            offset=RayCasterCameraCfg.OffsetCfg(pos=(2.5, 2.5, 4.0), rot=offset_rot, convention="ros"),
+            debug_vis=False,
+            pattern_cfg=camera_pattern_cfg,
+            data_types=["distance_to_image_plane", "distance_to_camera", "normals"],
+        )
+        camera_warp = RayCasterCamera(camera_cfg_warp)
+        # create usd camera
+        camera_cfg_usd = CameraCfg(
+            height=240,
+            width=320,
+            prim_path="/World/Camera_usd",
+            update_period=0,
+            data_types=["distance_to_image_plane", "distance_to_camera", "normals"],
+            spawn=PinholeCameraCfg(
+                focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(1e-6, 1.0e5)
+            ),
+            colorize=False,
+            offset=CameraCfg.OffsetCfg(pos=(2.5, 2.5, 4.0), rot=offset_rot, convention="ros"),
+        )
+        camera_usd = Camera(camera_cfg_usd)
+        # play sim
+        self.sim.reset()
+        self.sim.play()
+        # perform steps
+        for _ in range(5):
+            self.sim.step()
+        # update camera
+        camera_usd.update(self.dt)
+        camera_warp.update(self.dt)
+        # check image data
+        np.testing.assert_allclose(
+            camera_usd.data.output["distance_to_image_plane"].numpy(),
+            camera_warp.data.output["distance_to_image_plane"].numpy(),
+            rtol=5e-3,
+        )
+        np.testing.assert_allclose(
+            camera_usd.data.output["distance_to_camera"].numpy(),
+            camera_warp.data.output["distance_to_camera"].numpy(),
+            rtol=5e-3,
+        )
+        np.testing.assert_allclose(
+            camera_usd.data.output["normals"].numpy()[..., :3],
+            camera_warp.data.output["normals"].numpy(),
+            rtol=1e-5,
+            atol=1e-4,
+        )
+    def test_output_equal_to_usdcamera_prim_offset(self):
+        """Test that the output of the ray caster camera is equal to the output of the usd camera when both are placed
+        under an XForm prim that is translated and rotated from the world origin
+        ."""
+        offset_rot = [-0.1251, 0.3617, 0.8731, -0.3020]
+        # gf quat
+        gf_quatf = Gf.Quatd()
+        gf_quatf.SetReal(QUAT_OPENGL[0])
+        gf_quatf.SetImaginary(tuple(QUAT_OPENGL[1:]))
+        camera_pattern_cfg = patterns.PinholeCameraPatternCfg(
+            focal_length=24.0,
+            horizontal_aperture=20.955,
+            height=240,
+            width=320,
+        )
+        prim_raycast_cam = prim_utils.create_prim("/World/Camera_warp", "Xform")
+        prim_raycast_cam.GetAttribute("xformOp:translate").Set(tuple(POSITION))
+        prim_raycast_cam.GetAttribute("xformOp:orient").Set(gf_quatf)
+        camera_cfg_warp = RayCasterCameraCfg(
+            prim_path="/World/Camera_warp",
+            mesh_prim_paths=["/World/defaultGroundPlane"],
+            update_period=0,
+            offset=RayCasterCameraCfg.OffsetCfg(pos=(0, 0, 2.0), rot=offset_rot, convention="ros"),
+            debug_vis=False,
+            pattern_cfg=camera_pattern_cfg,
+            data_types=["distance_to_image_plane", "distance_to_camera", "normals"],
+        )
+        camera_warp = RayCasterCamera(camera_cfg_warp)
+        # create usd camera
+        camera_cfg_usd = CameraCfg(
+            height=240,
+            width=320,
+            prim_path="/World/Camera_usd/camera",
+            update_period=0,
+            data_types=["distance_to_image_plane", "distance_to_camera", "normals"],
+            spawn=PinholeCameraCfg(
+                focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(1e-6, 1.0e5)
+            ),
+            colorize=False,
+            offset=CameraCfg.OffsetCfg(pos=(0, 0, 2.0), rot=offset_rot, convention="ros"),
+        )
+        prim_usd = prim_utils.create_prim("/World/Camera_usd", "Xform")
+        prim_usd.GetAttribute("xformOp:translate").Set(tuple(POSITION))
+        prim_usd.GetAttribute("xformOp:orient").Set(gf_quatf)
+        camera_usd = Camera(camera_cfg_usd)
+        # play sim
+        self.sim.reset()
+        self.sim.play()
+        # perform steps
+        for _ in range(5):
+            self.sim.step()
+        # update camera
+        camera_usd.update(self.dt)
+        camera_warp.update(self.dt)
+        # check if pos and orientation are correct
+        np.testing.assert_allclose(camera_warp.data.pos_w[0].numpy(), camera_usd.data.pos_w[0].numpy(), rtol=1e-5)
+        np.testing.assert_allclose(
+            camera_warp.data.quat_w_ros[0].numpy(), camera_usd.data.quat_w_ros[0].numpy(), rtol=1e-5
+        )
+        # check image data
+        np.testing.assert_allclose(
+            camera_usd.data.output["distance_to_image_plane"].numpy(),
+            camera_warp.data.output["distance_to_image_plane"].numpy(),
+            rtol=5e-3,
+        )
+        np.testing.assert_allclose(
+            camera_usd.data.output["distance_to_camera"].numpy(),
+            camera_warp.data.output["distance_to_camera"].numpy(),
+            rtol=5e-3,
+        )
+        np.testing.assert_allclose(
+            camera_usd.data.output["normals"].numpy()[..., :3],
+            camera_warp.data.output["normals"].numpy(),
+            rtol=1e-5,
+            atol=1e-4,
+        )
+if __name__ == "__main__":
+    try:
+        unittest.main()
+    except Exception as err:
+        carb.log_error(err)
+        carb.log_error(traceback.format_exc())
+        raise
+    finally:
+        # close sim app
+        simulation_app.close()
--- a/source/standalone/demo/play_camera.py
+++ b/source/standalone/demo/play_camera.py
@@ -20,8 +20,9 @@ from omni.isaac.orbit.app import AppLauncher
 # add argparse arguments
 parser = argparse.ArgumentParser(description="This script demonstrates how to use the camera sensor.")
-parser.add_argument("--gpu", action="store_true", default=False, help="Use GPU device for camera rendering output.")
+parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU device for camera output.")
 parser.add_argument("--draw", action="store_true", default=False, help="Draw the obtained pointcloud on viewport.")
+parser.add_argument("--save", action="store_true", default=False, help="Save the obtained data to disk.")
 # append AppLauncher cli args
 AppLauncher.add_app_launcher_args(parser)
 # parse the arguments
@@ -45,8 +46,6 @@ import omni.isaac.core.utils.prims as prim_utils
 import omni.isaac.debug_draw._debug_draw as omni_debug_draw
 import omni.replicator.core as rep
 from omni.isaac.core.prims import RigidPrim
-from omni.isaac.core.simulation_context import SimulationContext
-from omni.isaac.core.utils.viewports import set_camera_view
 from pxr import Gf, UsdGeom
 import omni.isaac.orbit.sim as sim_utils
@@ -54,13 +53,19 @@ from omni.isaac.orbit.sensors.camera import Camera, CameraCfg
 from omni.isaac.orbit.utils import convert_dict_to_backend
 from omni.isaac.orbit.utils.math import project_points, transform_points, unproject_depth
-"""
-Helpers
-"""
+def main():
+    """Main function."""
+    # Load kit helper
+    sim_cfg = sim_utils.SimulationCfg(device="cpu" if args_cli.cpu else "cuda")
+    sim = sim_utils.SimulationContext(sim_cfg)
+    # Set main camera
+    sim.set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
+    # Acquire draw interface
+    draw_interface = omni_debug_draw.acquire_debug_draw_interface()
-def design_scene():
+    # Populate scene
-    """Add prims to the scene."""
    # Spawn things into stage
    # Ground-plane
    cfg = sim_utils.GroundPlaneCfg()
@@ -94,26 +99,6 @@ def design_scene():
        geom_prim.CreateDisplayColorAttr()
        geom_prim.GetDisplayColorAttr().Set([color])
-"""
-Main
-"""
-def main():
-    """Runs a camera sensor from orbit."""
-    # Load kit helper
-    sim = SimulationContext(
-        physics_dt=0.005, rendering_dt=0.005, backend="torch", device="cuda" if args_cli.gpu else "cpu"
-    )
-    # Set main camera
-    set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0])
-    # Acquire draw interface
-    draw_interface = omni_debug_draw.acquire_debug_draw_interface()
-    # Populate scene
-    design_scene()
    # Setup camera sensor
    camera_cfg = CameraCfg(
        prim_path="/World/CameraSensor_.*/Cam",
@@ -140,12 +125,12 @@ def main():
    # Set pose: There are two ways to set the pose of the camera.
    # -- Option-1: Set pose using view
-    eyes = [[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]]
+    eyes = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device)
-    targets = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
+    targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device)
    camera.set_world_poses_from_view(eyes, targets)
    # -- Option-2: Set pose using ROS
-    # position = [[2.5, 2.5, 2.5]]
+    # position = torch.tensor([[2.5, 2.5, 2.5]], device=sim.device)
-    # orientation = [[-0.17591989, 0.33985114, 0.82047325, -0.42470819]]
+    # orientation = torch.tensor([[-0.17591989, 0.33985114, 0.82047325, -0.42470819]], device=sim.device)
    # camera.set_world_pose_ros(position, orientation, indices=[0])
    # Simulate for a few steps
@@ -168,6 +153,8 @@ def main():
        print("-------------------------------")
        # Extract camera data
+        if args_cli.save:
+            # Save images from camera 1
            camera_index = 1
            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
            if sim.backend == "torch":
@@ -192,8 +179,12 @@ def main():
            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
            rep_writer.write(rep_output)
+        # Draw pointcloud
+        if not args_cli.headless and args_cli.draw:
            # Pointcloud in world frame
-        points_3d_cam = unproject_depth(camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices)
+            points_3d_cam = unproject_depth(
+                camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
+            )
            points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros)
            # Check methods are valid
@@ -204,8 +195,6 @@ def main():
            sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
            torch.testing.assert_allclose(reproj_depths, sim_depths)
-        # Draw pointcloud
-        if not args_cli.headless and args_cli.draw:
            # Convert to numpy for visualization
            if not isinstance(points_3d_world, np.ndarray):
                points_3d_world = points_3d_world.cpu().numpy()

--- a/source/standalone/demo/play_ray_caster_camera.py
+++ b/source/standalone/demo/play_ray_caster_camera.py
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES, ETH Zurich, and University of Toronto
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+"""
+This script shows how to use the ray-cast camera sensor from the Orbit framework.
+The camera sensor is based on using Warp kernels which do ray-casting against static meshes.
+"""
+"""Launch Isaac Sim Simulator first."""
+import argparse
+# omni-isaac-orbit
+from omni.isaac.orbit.app import AppLauncher
+# add argparse arguments
+parser = argparse.ArgumentParser(description="This script demonstrates how to use the ray-cast camera sensor.")
+parser.add_argument("--headless", action="store_true", default=False, help="Force display off at all times.")
+parser.add_argument("--num_envs", type=int, default=16, help="Number of environments to generate.")
+parser.add_argument("--save", type=int, default=16, help="Number of environments to generate.")
+args_cli = parser.parse_args()
+# launch omniverse app
+app_launcher = AppLauncher(headless=args_cli.headless)
+simulation_app = app_launcher.app
+"""Rest everything follows."""
+import os
+import torch
+import traceback
+import carb
+import omni.isaac.core.utils.prims as prim_utils
+import omni.replicator.core as rep
+import omni.isaac.orbit.sim as sim_utils
+import omni.isaac.orbit.terrains as terrain_gen
+from omni.isaac.orbit.sensors.ray_caster import RayCasterCamera, RayCasterCameraCfg, patterns
+from omni.isaac.orbit.terrains.config.rough import ROUGH_TERRAINS_CFG
+from omni.isaac.orbit.terrains.terrain_importer import TerrainImporter
+from omni.isaac.orbit.utils import convert_dict_to_backend
+from omni.isaac.orbit.utils.math import project_points, unproject_depth
+def main():
+    """Main function."""
+    # Load kit helper
+    sim = sim_utils.SimulationContext()
+    # Set main camera
+    sim.set_camera_view([2.5, 2.5, 3.5], [0.0, 0.0, 0.0])
+    # Populate scene
+    # Handler for terrains importing
+    terrain_importer_cfg = terrain_gen.TerrainImporterCfg(
+        num_envs=args_cli.num_envs,
+        env_spacing=3.0,
+        prim_path="/World/ground",
+        max_init_terrain_level=None,
+        terrain_type="generator",
+        terrain_generator=ROUGH_TERRAINS_CFG.replace(curriculum=True, num_rows=4, num_cols=4),
+        debug_vis=True,
+    )
+    terrain = TerrainImporter(terrain_importer_cfg)
+    # Lights
+    cfg = sim_utils.DistantLightCfg(intensity=600.0, color=(0.75, 0.75, 0.75))
+    cfg.func("/World/Light", cfg)
+    # Camera base frames
+    prim_utils.create_prim("/World/envs", "Scope")
+    for index in range(args_cli.num_envs):
+        prim_utils.create_prim(f"/World/envs/env_{index}", "Xform", translation=terrain.env_origins[index])
+        prim_utils.create_prim(f"/World/envs/env_{index}/Camera", "Xform")
+    # Setup camera sensor
+    camera_cfg = RayCasterCameraCfg(
+        prim_path="/World/envs/env_.*/Camera",
+        mesh_prim_paths=["/World/ground"],
+        update_period=0,
+        offset=RayCasterCameraCfg.OffsetCfg(pos=(0.0, 0.0, 0.0), rot=(1.0, 0.0, 0.0, 0.0)),
+        data_types=["distance_to_image_plane", "normals", "distance_to_camera"],
+        debug_vis=True,
+        pattern_cfg=patterns.PinholeCameraPatternCfg(
+            focal_length=24.0,
+            horizontal_aperture=20.955,
+            height=480,
+            width=640,
+        ),
+    )
+    # create xform because placement of camera directly under world is not supported
+    prim_utils.create_prim("/World/Camera", "Xform")
+    # Create camera
+    camera = RayCasterCamera(cfg=camera_cfg)
+    # Create replicator writer
+    output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "ray_caster_camera")
+    rep_writer = rep.BasicWriter(output_dir=output_dir, frame_padding=3)
+    # Play simulator
+    sim.play()
+    # Set pose: There are two ways to set the pose of the camera.
+    # -- Option-1: Set pose using view
+    eyes = torch.tensor([[-1.0, 0, 2] * args_cli.num_envs], device=sim.device).reshape(-1, 3)
+    targets = torch.tensor([[0.0, 0.0, 0.0] * args_cli.num_envs], device=sim.device).reshape(-1, 3)
+    camera.set_world_poses_from_view(eyes + terrain.env_origins, targets + terrain.env_origins)
+    # -- Option-2: Set pose using ROS
+    # position = torch.tensor([[2.5, 2.5, 2.5]], device=sim.device)
+    # orientation = torch.tensor([[-0.17591989, 0.33985114, 0.82047325, -0.42470819]], device=sim.device)
+    # camera.set_world_pose_ros(position, orientation, indices=[0])
+    # Simulate physics
+    while simulation_app.is_running():
+        # Step simulation
+        sim.step()
+        # Update camera data
+        camera.update(dt=sim.get_physics_dt())
+        # Print camera info
+        print(camera)
+        print("Received shape of depth image: ", camera.data.output["distance_to_image_plane"].shape)
+        print("-------------------------------")
+        # Extract camera data
+        if args_cli.save:
+            # Extract camera data
+            camera_index = 0
+            # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
+            if sim.backend == "torch":
+                # tensordict allows easy indexing of tensors in the dictionary
+                single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy")
+            else:
+                # for numpy, we need to manually index the data
+                single_cam_data = dict()
+                for key, value in camera.data.output.items():
+                    single_cam_data[key] = value[camera_index]
+            # Extract the other information
+            single_cam_info = camera.data.info[camera_index]
+            # Pack data back into replicator format to save them using its writer
+            rep_output = dict()
+            for key, data, info in zip(single_cam_data.keys(), single_cam_data.values(), single_cam_info.values()):
+                if info is not None:
+                    rep_output[key] = {"data": data, "info": info}
+                else:
+                    rep_output[key] = data
+            # Save images
+            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
+            rep_writer.write(rep_output)
+            # Pointcloud in world frame
+            points_3d_cam = unproject_depth(
+                camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices
+            )
+            # Check methods are valid
+            im_height, im_width = camera.image_shape
+            # -- project points to (u, v, d)
+            reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices)
+            reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2)
+            sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1)
+            torch.testing.assert_allclose(reproj_depths, sim_depths)
+if __name__ == "__main__":
+    try:
+        # run the main execution
+        main()
+    except Exception as err:
+        carb.log_error(err)
+        carb.log_error(traceback.format_exc())
+        raise
+    finally:
+        # close sim app
+        simulation_app.close()