Adds workaround for semantic segmentation issue with tiled camera (#1947)

# Description In the Isaac Sim 4.5 release, ``TiledCamera`` produces incorrect semantic and instance segmentation outputs when using instanceable assets. With scene instancing enabled, only the first tile generates correct outputs while other tiles produces blank data. This change introduces a workaround for the issue by disabling instancing on the assets when semantic segmentation and instance segmentation data is required from ``TiledCamera``. This workaround introduces a small slowdown in performance, but it's not significant when running with smaller scenes, which is normally the case when using rendering. Fixes #1946 ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com> Signed-off-by: Kelly Guo <kellyguo123@hotmail.com> Co-authored-by: Mayank Mittal <12863862+Mayankm96@users.noreply.github.com>

Adds workaround for semantic segmentation issue with tiled camera (#1947)
# Description In the Isaac Sim 4.5 release, ``TiledCamera`` produces incorrect semantic and instance segmentation outputs when using instanceable assets. With scene instancing enabled, only the first tile generates correct outputs while other tiles produces blank data. This change introduces a workaround for the issue by disabling instancing on the assets when semantic segmentation and instance segmentation data is required from ``TiledCamera``. This workaround introduces a small slowdown in performance, but it's not significant when running with smaller scenes, which is normally the case when using rendering. Fixes #1946 ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [x] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com> Signed-off-by: Kelly Guo <kellyguo123@hotmail.com> Co-authored-by: Mayank Mittal <12863862+Mayankm96@users.noreply.github.com>
868b4ffb · Kelly Guo · GitHub · 5fd22637 · 868b4ffb · 868b4ffb
Unverified Commit 868b4ffb authored Mar 05, 2025 by Kelly Guo Committed by GitHub Mar 05, 2025
5 changed files
--- a/source/isaaclab/config/extension.toml
+++ b/source/isaaclab/config/extension.toml
 [package]

 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.34.8"
+version = "0.34.9"

 # Description
 title = "Isaac Lab framework for Robot Learning"

--- a/source/isaaclab/docs/CHANGELOG.rst
+++ b/source/isaaclab/docs/CHANGELOG.rst
 Changelog
 ---------

+0.34.9 (2025-03-04)
+~~~~~~~~~~~~~~~~~~~
+
+Fixed
+^^^^^
+
+* Fixed issue in :class:`~isaaclab.sensors.TiledCamera` where segmentation outputs only display the first tile
+  when scene instancing is enabled. A workaround is added for now to disable instancing when segmentation
+  outputs are requested.
+
+
 0.34.8 (2025-03-04)
 ~~~~~~~~~~~~~~~~~~~


--- a/source/isaaclab/isaaclab/sensors/camera/tiled_camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/tiled_camera.py
@@ -16,7 +16,7 @@ import omni.usd
 import warp as wp
 from isaacsim.core.prims import XFormPrim
 from isaacsim.core.version import get_version
-from pxr import UsdGeom
+from pxr import Sdf, UsdGeom

 from isaaclab.utils.warp.kernels import reshape_tiled_image

@@ -92,6 +92,21 @@ class TiledCamera(Camera):
            )
        super().__init__(cfg)

+        # HACK: we need to disable instancing for semantic_segmentation and instance_segmentation_fast to work
+        isaac_sim_version = get_version()
+        # checks for Isaac Sim v4.5 as this issue exists there
+        if int(isaac_sim_version[2]) == 4 and int(isaac_sim_version[3]) == 5:
+            if "semantic_segmentation" in self.cfg.data_types or "instance_segmentation_fast" in self.cfg.data_types:
+                omni.log.warn(
+                    "Isaac Sim 4.5 introduced a bug in TiledCamera when outputting instance and semantic segmentation"
+                    " outputs for instanceable assets. As a workaround, the instanceable flag on assets will be"
+                    " disabled in the current workflow and may lead to longer load times and increased memory usage."
+                )
+                stage = omni.usd.get_context().get_stage()
+                with Sdf.ChangeBlock():
+                    for prim in stage.Traverse():
+                        prim.SetInstanceable(False)
+
    def __del__(self):
        """Unsubscribes from callbacks and detach from the replicator registry."""
        # unsubscribe from callbacks

--- a/source/isaaclab/test/sensors/test_tiled_camera.py
+++ b/source/isaaclab/test/sensors/test_tiled_camera.py
@@ -26,10 +26,11 @@ import isaacsim.core.utils.prims as prim_utils
 import isaacsim.core.utils.stage as stage_utils
 import omni.replicator.core as rep
 from isaacsim.core.prims import SingleGeometryPrim, SingleRigidPrim
-from pxr import Gf, UsdGeom
+from pxr import Gf, Semantics, UsdGeom

 import isaaclab.sim as sim_utils
 from isaaclab.sensors.camera import Camera, CameraCfg, TiledCamera, TiledCameraCfg
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
 from isaaclab.utils.timer import Timer


@@ -1165,7 +1166,7 @@ class TestTiledCamera(unittest.TestCase):
                elif data_type in ["motion_vectors"]:
                    self.assertEqual(im_data.shape, (num_cameras, camera_cfg.height, camera_cfg.width, 2))
                    for i in range(num_cameras):
-                        self.assertGreater(im_data[i].mean().item(), 0.0)
+                        self.assertNotEqual(im_data[i].mean().item(), 0.0)
                elif data_type in ["depth", "distance_to_camera", "distance_to_image_plane"]:
                    self.assertEqual(im_data.shape, (num_cameras, camera_cfg.height, camera_cfg.width, 1))
                    for i in range(num_cameras):
@@ -1286,6 +1287,133 @@ class TestTiledCamera(unittest.TestCase):

        del camera

+    def test_all_annotators_instanceable(self):
+        """Test initialization with all supported annotators on instanceable assets."""
+        all_annotator_types = [
+            "rgb",
+            "rgba",
+            "depth",
+            "distance_to_camera",
+            "distance_to_image_plane",
+            "normals",
+            "motion_vectors",
+            "semantic_segmentation",
+            "instance_segmentation_fast",
+            "instance_id_segmentation_fast",
+        ]
+
+        num_cameras = 10
+        for i in range(num_cameras):
+            prim_utils.create_prim(f"/World/Origin_{i}", "Xform", translation=(0.0, i, 0.0))
+
+        # Create a stage with 10 instanceable cubes, where each camera points to one cube
+        stage = stage_utils.get_current_stage()
+        for i in range(10):
+            # Remove objects added to stage by default
+            stage.RemovePrim(f"/World/Objects/Obj_{i:02d}")
+            # Add instanceable cubes
+            prim_utils.create_prim(
+                f"/World/Cube_{i}",
+                "Xform",
+                usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
+                translation=(0.0, i, 5.0),
+                orientation=(1.0, 0.0, 0.0, 0.0),
+                scale=(5.0, 5.0, 5.0),
+            )
+            prim = stage.GetPrimAtPath(f"/World/Cube_{i}")
+            sem = Semantics.SemanticsAPI.Apply(prim, "Semantics")
+            sem.CreateSemanticTypeAttr()
+            sem.CreateSemanticDataAttr()
+            sem.GetSemanticTypeAttr().Set("class")
+            sem.GetSemanticDataAttr().Set("cube")
+
+        # Create camera
+        camera_cfg = copy.deepcopy(self.camera_cfg)
+        camera_cfg.height = 120
+        camera_cfg.width = 80
+        camera_cfg.data_types = all_annotator_types
+        camera_cfg.prim_path = "/World/Origin_.*/CameraSensor"
+        camera_cfg.offset.pos = (0.0, 0.0, 5.5)
+        camera = TiledCamera(camera_cfg)
+        # Check simulation parameter is set correctly
+        self.assertTrue(self.sim.has_rtx_sensors())
+        # Play sim
+        self.sim.reset()
+        # Check if camera is initialized
+        self.assertTrue(camera.is_initialized)
+        # Check if camera prim is set correctly and that it is a camera prim
+        self.assertEqual(camera._sensor_prims[1].GetPath().pathString, "/World/Origin_1/CameraSensor")
+        self.assertIsInstance(camera._sensor_prims[0], UsdGeom.Camera)
+        self.assertListEqual(sorted(camera.data.output.keys()), sorted(all_annotator_types))
+
+        # Check buffers that exists and have correct shapes
+        self.assertEqual(camera.data.pos_w.shape, (num_cameras, 3))
+        self.assertEqual(camera.data.quat_w_ros.shape, (num_cameras, 4))
+        self.assertEqual(camera.data.quat_w_world.shape, (num_cameras, 4))
+        self.assertEqual(camera.data.quat_w_opengl.shape, (num_cameras, 4))
+        self.assertEqual(camera.data.intrinsic_matrices.shape, (num_cameras, 3, 3))
+        self.assertEqual(camera.data.image_shape, (camera_cfg.height, camera_cfg.width))
+
+        # Simulate for a few steps
+        # note: This is a workaround to ensure that the textures are loaded.
+        #   Check "Known Issues" section in the documentation for more details.
+        for _ in range(5):
+            self.sim.step()
+
+        # Simulate physics
+        for _ in range(2):
+            # perform rendering
+            self.sim.step()
+            # update camera
+            camera.update(self.dt)
+            # check image data
+            for data_type, im_data in camera.data.output.items():
+                if data_type in ["rgb", "normals"]:
+                    self.assertEqual(im_data.shape, (num_cameras, camera_cfg.height, camera_cfg.width, 3))
+                elif data_type in [
+                    "rgba",
+                    "semantic_segmentation",
+                    "instance_segmentation_fast",
+                    "instance_id_segmentation_fast",
+                ]:
+                    self.assertEqual(im_data.shape, (num_cameras, camera_cfg.height, camera_cfg.width, 4))
+                    # semantic_segmentation has mean 0.43
+                    # rgba has mean 0.38
+                    # instance_segmentation_fast has mean 0.42
+                    # instance_id_segmentation_fast has mean 0.55-0.62
+                    for i in range(num_cameras):
+                        self.assertGreater((im_data[i] / 255.0).mean().item(), 0.3)
+                elif data_type in ["motion_vectors"]:
+                    # motion vectors have mean 0.2
+                    self.assertEqual(im_data.shape, (num_cameras, camera_cfg.height, camera_cfg.width, 2))
+                    for i in range(num_cameras):
+                        self.assertGreater(im_data[i].abs().mean().item(), 0.15)
+                elif data_type in ["depth", "distance_to_camera", "distance_to_image_plane"]:
+                    # depth has mean 2.7
+                    # distance_to_image_plane has mean 3.1
+                    self.assertEqual(im_data.shape, (num_cameras, camera_cfg.height, camera_cfg.width, 1))
+                    for i in range(num_cameras):
+                        self.assertGreater(im_data[i].mean().item(), 2.5)
+
+        # access image data and compare dtype
+        output = camera.data.output
+        info = camera.data.info
+        self.assertEqual(output["rgb"].dtype, torch.uint8)
+        self.assertEqual(output["rgba"].dtype, torch.uint8)
+        self.assertEqual(output["depth"].dtype, torch.float)
+        self.assertEqual(output["distance_to_camera"].dtype, torch.float)
+        self.assertEqual(output["distance_to_image_plane"].dtype, torch.float)
+        self.assertEqual(output["normals"].dtype, torch.float)
+        self.assertEqual(output["motion_vectors"].dtype, torch.float)
+        self.assertEqual(output["semantic_segmentation"].dtype, torch.uint8)
+        self.assertEqual(output["instance_segmentation_fast"].dtype, torch.uint8)
+        self.assertEqual(output["instance_id_segmentation_fast"].dtype, torch.uint8)
+        self.assertEqual(type(info["semantic_segmentation"]), dict)
+        self.assertEqual(type(info["instance_segmentation_fast"]), dict)
+        self.assertEqual(type(info["instance_id_segmentation_fast"]), dict)
+
+        del camera
+
    def test_throughput(self):
        """Test tiled camera throughput."""


--- a/tools/test_settings.py
+++ b/tools/test_settings.py
@@ -18,7 +18,7 @@ DEFAULT_TIMEOUT = 120
 PER_TEST_TIMEOUTS = {
    "test_articulation.py": 200,
    "test_deformable_object.py": 200,
-    "test_environments.py": 1650,  # This test runs through all the environments for 100 steps each
+    "test_environments.py": 1850,  # This test runs through all the environments for 100 steps each
    "test_environment_determinism.py": 200,  # This test runs through many the environments for 100 steps each
    "test_factory_environments.py": 300,  # This test runs through Factory environments for 100 steps each
    "test_env_rendering_logic.py": 300,