Fixes rewards for Lift-Franka environment (#33)

* adds physics material for tool sites in single arm * adds collision props settings to robot base * makes zero correspond to open command * adds collision props to franka, anymal and a1 * cleans up the setting of prim cfg in object classes * tunes lift env to work with rsl-rl * enable self collisions in franka * adds units for physics-related params * tunes ppo for lift to work * updates changelog * adds warp-based state machine for lift environment

Fixes rewards for Lift-Franka environment (#33)
* adds physics material for tool sites in single arm * adds collision props settings to robot base * makes zero correspond to open command * adds collision props to franka, anymal and a1 * cleans up the setting of prim cfg in object classes * tunes lift env to work with rsl-rl * enable self collisions in franka * adds units for physics-related params * tunes ppo for lift to work * updates changelog * adds warp-based state machine for lift environment
6e6fa68e · Mayank Mittal · GitHub · 7fe10e1c · 6e6fa68e · 6e6fa68e
Unverified Commit 6e6fa68e authored Mar 20, 2023 by Mayank Mittal Committed by GitHub Mar 20, 2023
24 changed files
--- a/source/extensions/omni.isaac.orbit/config/extension.toml
+++ b/source/extensions/omni.isaac.orbit/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.2.5"
+version = "0.2.6"
 # Description
 title = "ORBIT framework for Robot Learning"

--- a/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst
 Changelog
 ---------
+0.2.6 (2023-03-16)
+~~~~~~~~~~~~~~~~~~
+Added
+^^^^^
+* Added the :class:`CollisionPropertiesCfg` to rigid/articulated object and robot base classes.
+* Added the :class:`PhysicsMaterialCfg` to the :class:`SingleArm` class for tool sites.
+Changed
+^^^^^^^
+* Changed the default control mode of the :obj:`PANDA_HAND_MIMIC_GROUP_CFG` to be from ``"v_abs"`` to ``"p_abs"``.
+  Using velocity control for the mimic group can cause the hand to move in a jerky manner.
 0.2.5 (2023-03-08)
 ~~~~~~~~~~~~~~~~~~

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/actuators/config/franka.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/actuators/config/franka.py
@@ -13,7 +13,7 @@ Actuator Groups.
 PANDA_HAND_MIMIC_GROUP_CFG = GripperActuatorGroupCfg(
    dof_names=["panda_finger_joint[1-2]"],
    model_cfg=ImplicitActuatorCfg(velocity_limit=0.2, torque_limit=200),
-    control_cfg=ActuatorControlCfg(command_types=["v_abs"], stiffness={".*": 1e5}, damping={".*": 1e3}),
+    control_cfg=ActuatorControlCfg(command_types=["p_abs"], stiffness={".*": 1e5}, damping={".*": 1e3}),
    mimic_multiplier={"panda_finger_joint.*": 1.0},
    speed=0.1,
    open_dof_pos=0.04,

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/actuators/group/gripper_group.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/actuators/group/gripper_group.py
@@ -72,6 +72,8 @@ class GripperActuatorGroup(ActuatorGroup):
        # create buffers
        self._previous_dof_targets = torch.zeros(self.num_articulation, self.num_actuators, device=self.device)
+        # constants
+        self._ALL_INDICES = torch.arange(self.view.count, device=self.view._device, dtype=torch.long)
    def __str__(self) -> str:
        """String representation of the actuator group."""
@@ -109,18 +111,25 @@ class GripperActuatorGroup(ActuatorGroup):
        We consider the following convention:
-        * Positive command -> open grippers
+        * Non-negative command (includes 0): open grippers
-        * Negative command -> close grippers
+        * Negative command: close grippers
        Returns:
            torch.Tensor: The target joint commands for the gripper.
        """
        # FIXME: mimic joint positions -- Gazebo plugin seems to do this.
-        #   The following is commented out because Isaac Sim doesn't support setting joint positions
+        # The following is commented out because Isaac Sim doesn't support setting joint positions
-        #   of particular dof indices properly. It sets joint positions and joint position targets for
+        # of particular dof indices properly. It sets joint positions and joint position targets for
-        #   the whole robot, i.e. all previous position targets is lost.
+        # the whole robot, i.e. all previous position targets is lost. This affects resetting the robot
-        # mimic_dof_pos = self._dof_pos[:, 0] * self._mimic_multiplier
+        # to a particular joint position.
-        # self.view.set_joint_positions(mimic_dof_pos, joint_indices=self.dof_indices)
+        # self.view._physics_sim_view.enable_warnings(False)
+        # # get current joint positions
+        # new_dof_pos = self.view._physics_view.get_dof_positions()
+        # # set joint positions of the mimic joints
+        # new_dof_pos[:, self.dof_indices] = self._dof_pos[:, 0].unsqueeze(1) * self._mimic_multiplier
+        # # set joint positions to the physics view
+        # self.view.set_joint_positions(new_dof_pos, self._ALL_INDICES)
+        # self.view._physics_sim_view.enable_warnings(True)
        # process actions
        if self.control_mode == "velocity":
@@ -134,7 +143,7 @@ class GripperActuatorGroup(ActuatorGroup):
            return dof_vel_targets
        else:
            # compute new command
-            dof_pos_targets = torch.where(command > 0, self._open_dof_pos, self._close_dof_pos)
+            dof_pos_targets = torch.where(command >= 0, self._open_dof_pos, self._close_dof_pos)
            dof_pos_targets = dof_pos_targets * self._mimic_multiplier
            # store new command
            self._previous_dof_targets[:] = dof_pos_targets

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/articulated/articulated_object.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/articulated/articulated_object.py
@@ -132,23 +132,11 @@ class ArticulatedObject:
        # TODO: What if prim already exists in the stage and spawn isn't called?
        # apply rigid body properties
-        kit_utils.set_nested_rigid_body_properties(
+        kit_utils.set_nested_rigid_body_properties(prim_path, **self.cfg.rigid_props.to_dict())
-            prim_path,
+        # apply collision properties
-            linear_damping=self.cfg.rigid_props.linear_damping,
+        kit_utils.set_nested_collision_properties(prim_path, **self.cfg.collision_props.to_dict())
-            angular_damping=self.cfg.rigid_props.angular_damping,
-            max_linear_velocity=self.cfg.rigid_props.max_linear_velocity,
-            max_angular_velocity=self.cfg.rigid_props.max_angular_velocity,
-            max_depenetration_velocity=self.cfg.rigid_props.max_depenetration_velocity,
-            disable_gravity=self.cfg.rigid_props.disable_gravity,
-            retain_accelerations=self.cfg.rigid_props.retain_accelerations,
-        )
        # articulation root settings
-        kit_utils.set_articulation_properties(
+        kit_utils.set_articulation_properties(prim_path, **self.cfg.articulation_props.to_dict())
-            prim_path,
-            enable_self_collisions=self.cfg.articulation_props.enable_self_collisions,
-            solver_position_iteration_count=self.cfg.articulation_props.solver_position_iteration_count,
-            solver_velocity_iteration_count=self.cfg.articulation_props.solver_velocity_iteration_count,
-        )
    def initialize(self, prim_paths_expr: Optional[str] = None):
        """Initializes the PhysX handles and internal buffers.

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/articulated/articulated_object_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/articulated/articulated_object_cfg.py
@@ -33,16 +33,32 @@ class ArticulatedObjectCfg:
        angular_damping: Optional[float] = None
        """Angular damping coefficient."""
        max_linear_velocity: Optional[float] = 1000.0
-        """Maximum linear velocity for rigid bodies. Defaults to 1000.0."""
+        """Maximum linear velocity for rigid bodies (in m/s). Defaults to 1000.0."""
        max_angular_velocity: Optional[float] = 1000.0
-        """Maximum angular velocity for rigid bodies. Defaults to 1000.0."""
+        """Maximum angular velocity for rigid bodies (in rad/s). Defaults to 1000.0."""
        max_depenetration_velocity: Optional[float] = 10.0
-        """Maximum depenetration velocity permitted to be introduced by the solver. Defaults to 10.0."""
+        """Maximum depenetration velocity permitted to be introduced by the solver (in m/s).
+        Defaults to 10.0."""
        disable_gravity: Optional[bool] = False
        """Disable gravity for the actor. Defaults to False."""
        retain_accelerations: Optional[bool] = None
        """Carries over forces/accelerations over sub-steps."""
+    @configclass
+    class CollisionPropertiesCfg:
+        """Properties to apply to all collisions in the articulation."""
+        collision_enabled: Optional[bool] = None
+        """Whether to enable or disable collisions."""
+        contact_offset: Optional[float] = None
+        """Contact offset for the collision shape."""
+        rest_offset: Optional[float] = None
+        """Rest offset for the collision shape."""
+        torsional_patch_radius: Optional[float] = None
+        """Radius of the contact patch for applying torsional friction."""
+        min_torsional_patch_radius: Optional[float] = None
+        """Minimum radius of the contact patch for applying torsional friction."""
    @configclass
    class ArticulationRootPropertiesCfg:
        """Properties to apply to articulation."""
@@ -85,5 +101,7 @@ class ArticulatedObjectCfg:
    """Initial state of the articulated object."""
    rigid_props: RigidBodyPropertiesCfg = RigidBodyPropertiesCfg()
    """Properties to apply to all rigid bodies in the articulation."""
+    collision_props: CollisionPropertiesCfg = CollisionPropertiesCfg()
+    """Properties to apply to all collisions in the articulation."""
    articulation_props: ArticulationRootPropertiesCfg = ArticulationRootPropertiesCfg()
    """Properties to apply to articulation."""
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/rigid/rigid_object.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/rigid/rigid_object.py
@@ -110,13 +110,9 @@ class RigidObject:
        # apply rigid body properties API
        RigidPrim(prim_path=prim_path)
        # -- set rigid body properties
-        kit_utils.set_nested_rigid_body_properties(
+        kit_utils.set_nested_rigid_body_properties(prim_path, **self.cfg.rigid_props.to_dict())
-            prim_path,
+        # apply collision properties
-            max_linear_velocity=self.cfg.rigid_props.max_linear_velocity,
+        kit_utils.set_nested_collision_properties(prim_path, **self.cfg.collision_props.to_dict())
-            max_angular_velocity=self.cfg.rigid_props.max_angular_velocity,
-            max_depenetration_velocity=self.cfg.rigid_props.max_depenetration_velocity,
-            disable_gravity=self.cfg.rigid_props.disable_gravity,
-        )
        # create physics material
        if self.cfg.physics_material is not None:
            # -- resolve material path

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/rigid/rigid_object_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/objects/rigid/rigid_object_cfg.py
@@ -26,15 +26,35 @@ class RigidObjectCfg:
    class RigidBodyPropertiesCfg:
        """Properties to apply to the rigid body."""
+        solver_position_iteration_count: Optional[int] = None
+        """Solver position iteration counts for the body."""
+        solver_velocity_iteration_count: Optional[int] = None
+        """Solver position iteration counts for the body."""
        max_linear_velocity: Optional[float] = 1000.0
-        """Maximum linear velocity for rigid bodies. Defaults to 1000.0."""
+        """Maximum linear velocity for rigid bodies (in m/s). Defaults to 1000.0."""
        max_angular_velocity: Optional[float] = 1000.0
-        """Maximum angular velocity for rigid bodies. Defaults to 1000.0."""
+        """Maximum angular velocity for rigid bodies (in rad/s). Defaults to 1000.0."""
        max_depenetration_velocity: Optional[float] = 10.0
-        """Maximum depenetration velocity permitted to be introduced by the solver. Defaults to 10.0."""
+        """Maximum depenetration velocity permitted to be introduced by the solver (in m/s).
+        Defaults to 10.0."""
        disable_gravity: Optional[bool] = False
        """Disable gravity for the actor. Defaults to False."""
+    @configclass
+    class CollisionPropertiesCfg:
+        """Properties to apply to all collisions in the articulation."""
+        collision_enabled: Optional[bool] = None
+        """Whether to enable or disable collisions."""
+        contact_offset: Optional[float] = None
+        """Contact offset for the collision shape."""
+        rest_offset: Optional[float] = None
+        """Rest offset for the collision shape."""
+        torsional_patch_radius: Optional[float] = None
+        """Radius of the contact patch for applying torsional friction."""
+        min_torsional_patch_radius: Optional[float] = None
+        """Minimum radius of the contact patch for applying torsional friction."""
    @configclass
    class PhysicsMaterialCfg:
        """Physics material applied to the rigid object."""
@@ -79,6 +99,8 @@ class RigidObjectCfg:
    """Initial state of the rigid object."""
    rigid_props: RigidBodyPropertiesCfg = RigidBodyPropertiesCfg()
    """Properties to apply to all rigid bodies in the object."""
+    collision_props: CollisionPropertiesCfg = CollisionPropertiesCfg()
+    """Properties to apply to all collisions in the articulation."""
    physics_material: Optional[PhysicsMaterialCfg] = PhysicsMaterialCfg()
    """Settings for the physics material to apply to the rigid object.

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/config/anymal.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/config/anymal.py
@@ -90,6 +90,10 @@ ANYMAL_B_CFG = LeggedRobotCfg(
        max_angular_velocity=1000.0,
        max_depenetration_velocity=1.0,
    ),
+    collision_props=LeggedRobotCfg.CollisionPropertiesCfg(
+        contact_offset=0.02,
+        rest_offset=0.0,
+    ),
    articulation_props=LeggedRobotCfg.ArticulationRootPropertiesCfg(
        enable_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=1
    ),
@@ -141,6 +145,10 @@ ANYMAL_C_CFG = LeggedRobotCfg(
        max_angular_velocity=1000.0,
        max_depenetration_velocity=1.0,
    ),
+    collision_props=LeggedRobotCfg.CollisionPropertiesCfg(
+        contact_offset=0.02,
+        rest_offset=0.0,
+    ),
    articulation_props=LeggedRobotCfg.ArticulationRootPropertiesCfg(
        enable_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=1
    ),

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/config/franka.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/config/franka.py
@@ -40,13 +40,23 @@ FRANKA_PANDA_ARM_WITH_PANDA_HAND_CFG = SingleArmManipulatorCfg(
            "panda_joint5": 0.0,
            "panda_joint6": 3.037,
            "panda_joint7": 0.741,
-            "panda_finger_joint*": 0.035,
+            "panda_finger_joint*": 0.04,
        },
        dof_vel={".*": 0.0},
    ),
    ee_info=SingleArmManipulatorCfg.EndEffectorFrameCfg(
        body_name="panda_hand", pos_offset=(0.0, 0.0, 0.1034), rot_offset=(1.0, 0.0, 0.0, 0.0)
    ),
+    rigid_props=SingleArmManipulatorCfg.RigidBodyPropertiesCfg(
+        max_depenetration_velocity=5.0,
+    ),
+    collision_props=SingleArmManipulatorCfg.CollisionPropertiesCfg(
+        contact_offset=0.005,
+        rest_offset=0.0,
+    ),
+    articulation_props=SingleArmManipulatorCfg.ArticulationRootPropertiesCfg(
+        enable_self_collisions=True,
+    ),
    actuator_groups={
        "panda_shoulder": ActuatorGroupCfg(
            dof_names=["panda_joint[1-4]"],

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/config/unitree.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/config/unitree.py
@@ -54,6 +54,10 @@ UNITREE_A1_CFG = LeggedRobotCfg(
        max_angular_velocity=1000.0,
        max_depenetration_velocity=1.0,
    ),
+    collision_props=LeggedRobotCfg.CollisionPropertiesCfg(
+        contact_offset=0.02,
+        rest_offset=0.0,
+    ),
    articulation_props=LeggedRobotCfg.ArticulationRootPropertiesCfg(
        enable_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=1
    ),

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/legged_robot/legged_robot.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/legged_robot/legged_robot.py
@@ -62,8 +62,6 @@ class LeggedRobot(RobotBase):
    def spawn(self, prim_path: str, translation: Sequence[float] = None, orientation: Sequence[float] = None):
        # spawn the robot and set its location
        super().spawn(prim_path, translation, orientation)
-        # alter physics collision properties
-        kit_utils.set_nested_collision_properties(prim_path, contact_offset=0.02, rest_offset=0.0)
        # add physics material to the feet bodies!
        if self.cfg.physics_material is not None:
            # -- resolve material path

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/robot_base.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/robot_base.py
@@ -67,7 +67,7 @@ class RobotBase:
    @property
    def device(self) -> str:
        """Memory device for computation."""
-        return self.articulations._device
+        return self.articulations._device  # noqa: W0212
    @property
    def body_names(self) -> List[str]:
@@ -132,23 +132,11 @@ class RobotBase:
            carb.log_warn(f"A prim already exists at prim path: '{prim_path}'. Skipping...")
        # apply rigid body properties
-        kit_utils.set_nested_rigid_body_properties(
+        kit_utils.set_nested_rigid_body_properties(prim_path, **self.cfg.rigid_props.to_dict())
-            prim_path,
+        # apply collision properties
-            linear_damping=self.cfg.rigid_props.linear_damping,
+        kit_utils.set_nested_collision_properties(prim_path, **self.cfg.collision_props.to_dict())
-            angular_damping=self.cfg.rigid_props.angular_damping,
-            max_linear_velocity=self.cfg.rigid_props.max_linear_velocity,
-            max_angular_velocity=self.cfg.rigid_props.max_angular_velocity,
-            max_depenetration_velocity=self.cfg.rigid_props.max_depenetration_velocity,
-            disable_gravity=self.cfg.rigid_props.disable_gravity,
-            retain_accelerations=self.cfg.rigid_props.retain_accelerations,
-        )
        # articulation root settings
-        kit_utils.set_articulation_properties(
+        kit_utils.set_articulation_properties(prim_path, **self.cfg.articulation_props.to_dict())
-            prim_path,
-            enable_self_collisions=self.cfg.articulation_props.enable_self_collisions,
-            solver_position_iteration_count=self.cfg.articulation_props.solver_position_iteration_count,
-            solver_velocity_iteration_count=self.cfg.articulation_props.solver_velocity_iteration_count,
-        )
        # set spawned to true
        self._is_spawned = True
@@ -204,6 +192,7 @@ class RobotBase:
        # reset history
        self._previous_dof_vel[env_ids] = 0
        # TODO: Reset other cached variables.
+        self.articulations.set_joint_efforts(self._ZERO_JOINT_EFFORT, indices=self._ALL_INDICES)
        # reset actuators
        for group in self.actuator_groups.values():
            group.reset(env_ids)
@@ -220,11 +209,6 @@ class RobotBase:
        # slice actions per actuator group
        group_actions_dims = [group.control_dim for group in self.actuator_groups.values()]
        all_group_actions = torch.split(actions, group_actions_dims, dim=-1)
-        # silence the physics sim for warnings that make no sense :)
-        self.articulations._physics_sim_view.enable_warnings(False)
-        # note (18.08.2022): Saw a difference of up to 5 ms per step when using Isaac Sim
-        #   interfaces compared to direct PhysX calls. Thus, this function is optimized.
-        #   acquire tensors for whole robot
        # note: we use internal buffers to deal with the resets() as the buffers aren't forwarded
        #   unit the next simulation step.
        dof_pos = self._data.dof_pos
@@ -250,6 +234,11 @@ class RobotBase:
            self._data.gear_ratio[:, group.dof_indices] = group.gear_ratio
            if group.velocity_limit is not None:
                self._data.soft_dof_vel_limits[:, group.dof_indices] = group.velocity_limit
+        # silence the physics sim for warnings that make no sense :)
+        # note (18.08.2022): Saw a difference of up to 5 ms per step when using Isaac Sim
+        #   ArticulationView.apply_action() method compared to direct PhysX calls. Thus,
+        #   this function is optimized to apply actions for the whole robot.
+        self.articulations._physics_sim_view.enable_warnings(False)
        # apply actions into sim
        if self.sim_dof_control_modes["position"]:
            self.articulations._physics_view.set_dof_position_targets(self._data.dof_pos_targets, self._ALL_INDICES)
@@ -472,6 +461,7 @@ class RobotBase:
        self._previous_dof_vel = torch.zeros(self.count, self.num_dof, dtype=torch.float, device=self.device)
        # constants
        self._ALL_INDICES = torch.arange(self.count, dtype=torch.long, device=self.device)
+        self._ZERO_JOINT_EFFORT = torch.zeros(self.count, self.num_dof, dtype=torch.float, device=self.device)
        # -- frame states
        self._data.root_state_w = torch.zeros(self.count, 13, dtype=torch.float, device=self.device)

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/robot_base_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/robot_base_cfg.py
@@ -33,16 +33,32 @@ class RobotBaseCfg:
        angular_damping: Optional[float] = None
        """Angular damping coefficient."""
        max_linear_velocity: Optional[float] = 1000.0
-        """Maximum linear velocity for rigid bodies. Defaults to 1000.0."""
+        """Maximum linear velocity for rigid bodies (in m/s). Defaults to 1000.0."""
        max_angular_velocity: Optional[float] = 1000.0
-        """Maximum angular velocity for rigid bodies. Defaults to 1000.0."""
+        """Maximum angular velocity for rigid bodies (in rad/s). Defaults to 1000.0."""
        max_depenetration_velocity: Optional[float] = 10.0
-        """Maximum depenetration velocity permitted to be introduced by the solver. Defaults to 10.0."""
+        """Maximum depenetration velocity permitted to be introduced by the solver (in m/s).
+        Defaults to 10.0."""
        disable_gravity: Optional[bool] = False
        """Disable gravity for the actor. Defaults to False."""
        retain_accelerations: Optional[bool] = None
        """Carries over forces/accelerations over sub-steps."""
+    @configclass
+    class CollisionPropertiesCfg:
+        """Properties to apply to all collisions in the articulation."""
+        collision_enabled: Optional[bool] = None
+        """Whether to enable or disable collisions."""
+        contact_offset: Optional[float] = None
+        """Contact offset for the collision shape."""
+        rest_offset: Optional[float] = None
+        """Rest offset for the collision shape."""
+        torsional_patch_radius: Optional[float] = None
+        """Radius of the contact patch for applying torsional friction."""
+        min_torsional_patch_radius: Optional[float] = None
+        """Minimum radius of the contact patch for applying torsional friction."""
    @configclass
    class ArticulationRootPropertiesCfg:
        """Properties to apply to articulation."""
@@ -85,6 +101,8 @@ class RobotBaseCfg:
    """Initial state of the robot."""
    rigid_props: RigidBodyPropertiesCfg = RigidBodyPropertiesCfg()
    """Properties to apply to all rigid bodies in the articulation."""
+    collision_props: CollisionPropertiesCfg = CollisionPropertiesCfg()
+    """Properties to apply to all collisions in the articulation."""
    articulation_props: ArticulationRootPropertiesCfg = ArticulationRootPropertiesCfg()
    """Properties to apply to articulation."""
    actuator_groups: Dict[str, ActuatorGroupCfg] = MISSING

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/single_arm/single_arm.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/single_arm/single_arm.py
@@ -5,10 +5,13 @@
 import re
 import torch
-from typing import Dict, Optional
+from typing import Dict, Optional, Sequence
+from omni.isaac.core.materials import PhysicsMaterial
 from omni.isaac.core.prims import RigidPrimView
+from pxr import PhysxSchema
+import omni.isaac.orbit.utils.kit as kit_utils
 from omni.isaac.orbit.utils.math import combine_frame_transforms, quat_rotate_inverse, subtract_frame_transforms
 from ..robot_base import RobotBase
@@ -64,6 +67,31 @@ class SingleArmManipulator(RobotBase):
    Operations.
    """
+    def spawn(self, prim_path: str, translation: Sequence[float] = None, orientation: Sequence[float] = None):
+        # spawn the robot and set its location
+        super().spawn(prim_path, translation, orientation)
+        # alter physics collision properties
+        kit_utils.set_nested_collision_properties(prim_path, contact_offset=0.02, rest_offset=0.0)
+        # add physics material to the tool sites bodies!
+        if self.cfg.physics_material is not None and self.cfg.meta_info.tool_sites_names is not None:
+            # -- resolve material path
+            material_path = self.cfg.physics_material.prim_path
+            if not material_path.startswith("/"):
+                material_path = prim_path + "/" + material_path
+            # -- create material
+            material = PhysicsMaterial(
+                prim_path=material_path,
+                static_friction=self.cfg.physics_material.static_friction,
+                dynamic_friction=self.cfg.physics_material.dynamic_friction,
+                restitution=self.cfg.physics_material.restitution,
+            )
+            # -- enable patch-friction: yields better results!
+            physx_material_api = PhysxSchema.PhysxMaterialAPI.Apply(material.prim)
+            physx_material_api.CreateImprovePatchFrictionAttr().Set(True)
+            # -- bind material to feet
+            for site_name in self.cfg.meta_info.tool_sites_names:
+                kit_utils.apply_nested_physics_material(f"{prim_path}/{site_name}", material.prim_path)
    def initialize(self, prim_paths_expr: Optional[str] = None):
        # initialize parent handles
        super().initialize(prim_paths_expr)

--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/single_arm/single_arm_cfg.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/robots/single_arm/single_arm_cfg.py
@@ -56,6 +56,24 @@ class SingleArmManipulatorCfg(RobotBaseCfg):
        enable_gravity: bool = False
        """Fill in generalized gravity forces into data buffers. Defaults to False."""
+    @configclass
+    class PhysicsMaterialCfg:
+        """Physics material applied to the tool sites of the robot."""
+        prim_path = "/World/Materials/toolMaterial"
+        """Path to the physics material prim. Defaults to /World/Materials/toolMaterial.
+        Note:
+            If the prim path is not absolute, it will be resolved relative to the path specified when spawning
+            the object.
+        """
+        static_friction: float = 1.0
+        """Static friction coefficient. Defaults to 1.0."""
+        dynamic_friction: float = 1.0
+        """Dynamic friction coefficient. Defaults to 1.0."""
+        restitution: float = 0.0
+        """Restitution coefficient. Defaults to 0.0."""
    ##
    # Initialize configurations.
    ##
@@ -66,3 +84,5 @@ class SingleArmManipulatorCfg(RobotBaseCfg):
    """Information about the end-effector frame location."""
    data_info: DataInfoCfg = DataInfoCfg()
    """Information about what all data to read from simulator."""
+    physics_material: PhysicsMaterialCfg = PhysicsMaterialCfg()
+    """Physics material applied to the tool sites of the robot."""
--- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/kit.py
+++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/utils/kit.py
@@ -322,14 +322,14 @@ def set_rigid_body_properties(
        solver_velocity_iteration_count (Optional[int]): Solver velocity iteration counts for the body.
        linear_damping (Optional[float]): Linear damping coefficient.
        angular_damping (Optional[float]): Angular damping coefficient.
-        max_linear_velocity (Optional[float]): Max allowable linear velocity for rigid body.
+        max_linear_velocity (Optional[float]): Max allowable linear velocity for rigid body (in m/s).
-        max_angular_velocity (Optional[float]): Max allowable angular velocity for rigid body.
+        max_angular_velocity (Optional[float]): Max allowable angular velocity for rigid body (in rad/s).
        sleep_threshold (Optional[float]): Mass-normalized kinetic energy threshold below which an actor
            may go to sleep.
        stabilization_threshold (Optional[float]): Mass-normalized kinetic energy threshold below which
            an actor may participate in stabilization.
        max_depenetration_velocity (Optional[float]): The maximum depenetration velocity permitted to
-            be introduced by the solver.
+            be introduced by the solver (in m/s).
        max_contact_impulse (Optional[float]): The limit on the impulse that may be applied at a contact.
        enable_gyroscopic_forces (Optional[bool]): Enables computation of gyroscopic forces on the
            rigid body.
@@ -408,12 +408,12 @@ def set_collision_properties(
    Args:
        prim_path (str): The prim path of parent.
        collision_enabled (Optional[bool], optional): Whether to enable/disable collider.
-        contact_offset (Optional[float], optional): Contact offset of a collision shape.
+        contact_offset (Optional[float], optional): Contact offset of a collision shape (in m).
-        rest_offset (Optional[float], optional): Rest offset of a collision shape.
+        rest_offset (Optional[float], optional): Rest offset of a collision shape (in m).
        torsional_patch_radius (Optional[float], optional): Defines the radius of the contact patch
-            used to apply torsional friction.
+            used to apply torsional friction (in m).
        min_torsional_patch_radius (Optional[float], optional): Defines the minimum radius of the
-            contact patch used to apply torsional friction.
+            contact patch used to apply torsional friction (in m).
    Raises:
        ValueError:  When no collision schema found at specified prim path.
@@ -536,14 +536,14 @@ def set_nested_rigid_body_properties(prim_path: str, **kwargs):
        solver_velocity_iteration_count (Optional[int]): Solver velocity iteration counts for the body.
        linear_damping (Optional[float]): Linear damping coefficient.
        angular_damping (Optional[float]): Angular damping coefficient.
-        max_linear_velocity (Optional[float]): Max allowable linear velocity for rigid body.
+        max_linear_velocity (Optional[float]): Max allowable linear velocity for rigid body (in m/s).
-        max_angular_velocity (Optional[float]): Max allowable angular velocity for rigid body.
+        max_angular_velocity (Optional[float]): Max allowable angular velocity for rigid body (in rad/s).
        sleep_threshold (Optional[float]): Mass-normalized kinetic energy threshold below which an actor
            may go to sleep.
        stabilization_threshold (Optional[float]): Mass-normalized kinetic energy threshold below which
            an actor may participate in stabilization.
        max_depenetration_velocity (Optional[float]): The maximum depenetration velocity permitted to
-            be introduced by the solver.
+            be introduced by the solver (in m/s).
        max_contact_impulse (Optional[float]): The limit on the impulse that may be applied at a contact.
        enable_gyroscopic_forces (Optional[bool]): Enables computation of gyroscopic forces on the
            rigid body.
@@ -575,12 +575,12 @@ def set_nested_collision_properties(prim_path: str, **kwargs):
    Keyword Args:
        collision_enabled (Optional[bool], optional): Whether to enable/disable collider.
-        contact_offset (Optional[float], optional): Contact offset of a collision shape.
+        contact_offset (Optional[float], optional): Contact offset of a collision shape (in m).
-        rest_offset (Optional[float], optional): Rest offset of a collision shape.
+        rest_offset (Optional[float], optional): Rest offset of a collision shape (in m).
        torsional_patch_radius (Optional[float], optional): Defines the radius of the contact patch
-            used to apply torsional friction.
+            used to apply torsional friction (in m).
        min_torsional_patch_radius (Optional[float], optional): Defines the minimum radius of the
-            contact patch used to apply torsional friction.
+            contact patch used to apply torsional friction (in m).
    """
    # get USD prim
    prim = prim_utils.get_prim_at_path(prim_path)

--- a/source/extensions/omni.isaac.orbit_envs/config/extension.toml
+++ b/source/extensions/omni.isaac.orbit_envs/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.2.2"
+version = "0.2.3"
 # Description
 title = "ORBIT Environments"

--- a/source/extensions/omni.isaac.orbit_envs/data/rl_games/lift_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rl_games/lift_ppo.yaml
@@ -49,18 +49,18 @@ params:
    mixed_precision: False
    normalize_input: True
    normalize_value: True
-    value_bootstrap: False
+    value_bootstrap: True
    num_actors: -1
    reward_shaper:
-      scale_value: 0.01
+      scale_value: 1.0
    normalize_advantage: True
    gamma: 0.99
    tau: 0.95
-    learning_rate: 3e-4
+    learning_rate: 5e-4
    lr_schedule: adaptive
    schedule_type: legacy
    kl_threshold: 0.008
-    score_to_win: 100000000
+    score_to_win: 10000
    max_epochs: 5000
    save_best_after: 200
    save_frequency: 100
@@ -69,9 +69,9 @@ params:
    entropy_coef: 0.0
    truncate_grads: True
    e_clip: 0.2
-    horizon_length: 16
+    horizon_length: 32
    minibatch_size: 2048
-    mini_epochs: 8
+    mini_epochs: 5
    critic_coef: 4
    clip_value: True
    seq_len: 4

--- a/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/lift_ppo.yaml
+++ b/source/extensions/omni.isaac.orbit_envs/data/rsl_rl/lift_ppo.yaml
@@ -32,8 +32,8 @@ algorithm:
 runner:
  policy_class_name: "ActorCritic"
  algorithm_class_name: "PPO"
-  num_steps_per_env: 192  # per iteration
+  num_steps_per_env: 96  # per iteration
-  max_iterations: 1500  # number of policy updates
+  max_iterations: 700  # number of policy updates
  # logging
  save_interval: 50  # check for potential saves every this many iterations

--- a/source/extensions/omni.isaac.orbit_envs/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.orbit_envs/docs/CHANGELOG.rst
 Changelog
 ---------
+0.2.3 (2023-03-06)
+~~~~~~~~~~~~~~~~~~
+Fixed
+^^^^^
+* Tuned the observations and rewards for ``Isaac-Lift-Franka-v0`` environment.
 0.2.2 (2023-03-04)
 ~~~~~~~~~~~~~~~~~~

--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/lift_cfg.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/lift_cfg.py
@@ -37,9 +37,11 @@ class ManipulationObjectCfg(RigidObjectCfg):
        pos=(0.4, 0.0, 0.075), rot=(1.0, 0.0, 0.0, 0.0), lin_vel=(0.0, 0.0, 0.0), ang_vel=(0.0, 0.0, 0.0)
    )
    rigid_props = RigidObjectCfg.RigidBodyPropertiesCfg(
+        solver_position_iteration_count=16,
+        solver_velocity_iteration_count=1,
        max_angular_velocity=1000.0,
        max_linear_velocity=1000.0,
-        max_depenetration_velocity=10.0,
+        max_depenetration_velocity=5.0,
        disable_gravity=False,
    )
    physics_material = RigidObjectCfg.PhysicsMaterialCfg(
@@ -84,8 +86,8 @@ class RandomizationCfg:
        position_cat: str = "default"  # randomize position: "default", "uniform"
        orientation_cat: str = "default"  # randomize position: "default", "uniform"
        # randomize position
-        position_uniform_min = [0.25, -0.25, 0.25]  # position (x,y,z)
+        position_uniform_min = [0.4, -0.25, 0.075]  # position (x,y,z)
-        position_uniform_max = [0.5, 0.25, 0.5]  # position (x,y,z)
+        position_uniform_max = [0.6, 0.25, 0.075]  # position (x,y,z)
    @configclass
    class ObjectDesiredPoseCfg:
@@ -96,8 +98,8 @@ class RandomizationCfg:
        orientation_cat: str = "default"  # randomize position: "default", "uniform"
        # randomize position
        position_default = [0.5, 0.0, 0.5]  # position default (x,y,z)
-        position_uniform_min = [0.25, -0.25, 0.25]  # position (x,y,z)
+        position_uniform_min = [0.4, -0.25, 0.25]  # position (x,y,z)
-        position_uniform_max = [0.5, 0.25, 0.5]  # position (x,y,z)
+        position_uniform_max = [0.6, 0.25, 0.5]  # position (x,y,z)
        # randomize orientation
        orientation_default = [1.0, 0.0, 0.0, 0.0]  # orientation default
@@ -117,13 +119,24 @@ class ObservationsCfg:
        # global group settings
        enable_corruption: bool = True
        # observation terms
-        arm_dof_pos_scaled = {"scale": 1.0, "noise": {"name": "uniform", "min": -0.01, "max": 0.01}}
+        # -- joint state
+        arm_dof_pos = {"scale": 1.0}
+        # arm_dof_pos_scaled = {"scale": 1.0}
+        # arm_dof_vel = {"scale": 0.5, "noise": {"name": "uniform", "min": -0.01, "max": 0.01}}
        tool_dof_pos_scaled = {"scale": 1.0}
-        arm_dof_vel = {"scale": 0.5, "noise": {"name": "uniform", "min": -0.1, "max": 0.1}}
+        # -- end effector state
-        tool_positions = {}
+        tool_positions = {"scale": 1.0}
-        object_positions = {}
+        tool_orientations = {"scale": 1.0}
-        object_desired_positions = {}
+        # -- object state
-        actions = {}
+        # object_positions = {"scale": 1.0}
+        # object_orientations = {"scale": 1.0}
+        object_relative_tool_positions = {"scale": 1.0}
+        # object_relative_tool_orientations = {"scale": 1.0}
+        # -- object desired state
+        object_desired_positions = {"scale": 1.0}
+        # -- previous action
+        arm_actions = {"scale": 1.0}
+        tool_actions = {"scale": 1.0}
    # global observation settings
    return_dict_obs_in_group = False
@@ -136,15 +149,20 @@ class ObservationsCfg:
 class RewardsCfg:
    """Reward terms for the MDP."""
-    # robot-centric
+    # -- robot-centric
-    reaching_object_position_l2 = {"weight": 0.0}
+    # reaching_object_position_l2 = {"weight": 0.0}
-    reaching_object_position_exp = {"weight": 2.5, "sigma": 0.25}
+    # reaching_object_position_exp = {"weight": 2.5, "sigma": 0.25}
-    penalizing_robot_dof_velocity_l2 = {"weight": 1e-4}
+    reaching_object_position_tanh = {"weight": 2.5, "sigma": 0.1}
-    penalizing_robot_dof_acceleration_l2 = {"weight": 1e-7}
+    # penalizing_arm_dof_velocity_l2 = {"weight": 1e-5}
-    penalizing_action_rate_l2 = {"weight": 1e-2}
+    # penalizing_tool_dof_velocity_l2 = {"weight": 1e-5}
-    # object-centric
+    # penalizing_robot_dof_acceleration_l2 = {"weight": 1e-7}
-    tracking_object_position_exp = {"weight": 2.5, "sigma": 0.5}
+    # -- action-centric
-    lifting_object_success = {"weight": 0.0, "threshold": 1e-3}
+    penalizing_arm_action_rate_l2 = {"weight": 1e-2}
+    # penalizing_tool_action_l2 = {"weight": 1e-2}
+    # -- object-centric
+    # tracking_object_position_exp = {"weight": 5.0, "sigma": 0.25, "threshold": 0.08}
+    tracking_object_position_tanh = {"weight": 5.0, "sigma": 0.2, "threshold": 0.08}
+    lifting_object_success = {"weight": 3.5, "threshold": 0.08}
 @configclass
@@ -184,15 +202,18 @@ class LiftEnvCfg(IsaacEnvCfg):
    """Configuration for the Lift environment."""
    # General Settings
-    env: EnvCfg = EnvCfg(num_envs=1024, env_spacing=2.5, episode_length_s=4.0)
+    env: EnvCfg = EnvCfg(num_envs=4096, env_spacing=2.5, episode_length_s=5.0)
    viewer: ViewerCfg = ViewerCfg(debug_vis=True, eye=(7.5, 7.5, 7.5), lookat=(0.0, 0.0, 0.0))
    # Physics settings
    sim: SimCfg = SimCfg(
-        dt=1.0 / 60.0,
+        dt=0.01,
        substeps=1,
        physx=PhysxCfg(
-            gpu_found_lost_aggregate_pairs_capacity=512 * 1024,
+            gpu_found_lost_aggregate_pairs_capacity=1024 * 1024 * 4,
-            gpu_total_aggregate_pairs_capacity=6 * 1024,
+            gpu_total_aggregate_pairs_capacity=16 * 1024,
+            friction_correlation_distance=0.00625,
+            friction_offset_threshold=0.01,
+            bounce_threshold_velocity=0.2,
        ),
    )

--- a/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/lift_env.py
+++ b/source/extensions/omni.isaac.orbit_envs/omni/isaac/orbit_envs/manipulation/lift/lift_env.py
@@ -16,7 +16,7 @@ from omni.isaac.orbit.markers import StaticMarker
 from omni.isaac.orbit.objects import RigidObject
 from omni.isaac.orbit.robots.single_arm import SingleArmManipulator
 from omni.isaac.orbit.utils.dict import class_to_dict
-from omni.isaac.orbit.utils.math import random_orientation, sample_uniform, scale_transform
+from omni.isaac.orbit.utils.math import quat_inv, quat_mul, random_orientation, sample_uniform, scale_transform
 from omni.isaac.orbit.utils.mdp import ObservationManager, RewardManager
 from omni.isaac.orbit_envs.isaac_env import IsaacEnv, VecEnvIndices, VecEnvObs
@@ -60,7 +60,9 @@ class LiftEnv(IsaacEnv):
        # compute the action space
        self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(self.num_actions,))
        print("[INFO]: Completed setting up the environment...")
        # Take an initial step to initialize the scene.
+        # This is required to compute quantities like Jacobians used in step().
        self.sim.step()
        # -- fill up buffers
        self.object.update_buffers(self.dt)
@@ -151,7 +153,7 @@ class LiftEnv(IsaacEnv):
            # offset actuator command with position offsets
            dof_pos_offset = self.robot.data.actuator_pos_offset
            self.robot_actions[:, : self.robot.arm_num_dof] -= dof_pos_offset[:, : self.robot.arm_num_dof]
-            # we assume last command is gripper action so don't change that
+            # we assume last command is tool action so don't change that
            self.robot_actions[:, -1] = self.actions[:, -1]
        elif self.cfg.control.control_type == "default":
            self.robot_actions[:] = self.actions
@@ -252,6 +254,8 @@ class LiftEnv(IsaacEnv):
        self.robot_actions = torch.zeros((self.num_envs, self.robot.num_actions), device=self.device)
        # commands
        self.object_des_pose_w = torch.zeros((self.num_envs, 7), device=self.device)
+        # buffers
+        self.object_root_pose_ee = torch.zeros((self.num_envs, 7), device=self.device)
        # time-step = 0
        self.object_init_pose_w = torch.zeros((self.num_envs, 7), device=self.device)
@@ -352,6 +356,10 @@ class LiftEnv(IsaacEnv):
 class LiftObservationManager(ObservationManager):
    """Reward manager for single-arm reaching environment."""
+    def arm_dof_pos(self, env: LiftEnv):
+        """DOF positions for the arm."""
+        return env.robot.data.arm_dof_pos
    def arm_dof_pos_scaled(self, env: LiftEnv):
        """DOF positions for the arm normalized to its max and min ranges."""
        return scale_transform(
@@ -376,17 +384,58 @@ class LiftObservationManager(ObservationManager):
        """Current end-effector position of the arm."""
        return env.robot.data.ee_state_w[:, :3] - env.envs_positions
+    def tool_orientations(self, env: LiftEnv):
+        """Current end-effector orientation of the arm."""
+        # make the first element positive
+        quat_w = env.robot.data.ee_state_w[:, 3:7]
+        quat_w[quat_w[:, 0] < 0] *= -1
+        return quat_w
    def object_positions(self, env: LiftEnv):
        """Current object position."""
        return env.object.data.root_pos_w - env.envs_positions
+    def object_orientations(self, env: LiftEnv):
+        """Current object orientation."""
+        # make the first element positive
+        quat_w = env.object.data.root_quat_w
+        quat_w[quat_w[:, 0] < 0] *= -1
+        return quat_w
+    def object_relative_tool_positions(self, env: LiftEnv):
+        """Current object position w.r.t. end-effector frame."""
+        return env.object.data.root_pos_w - env.robot.data.ee_state_w[:, :3]
+    def object_relative_tool_orientations(self, env: LiftEnv):
+        """Current object orientation w.r.t. end-effector frame."""
+        # compute the relative orientation
+        quat_ee = quat_mul(quat_inv(env.robot.data.ee_state_w[:, 3:7]), env.object.data.root_quat_w)
+        # make the first element positive
+        quat_ee[quat_ee[:, 0] < 0] *= -1
+        return quat_ee
    def object_desired_positions(self, env: LiftEnv):
        """Desired object position."""
        return env.object_des_pose_w[:, 0:3] - env.envs_positions
-    def actions(self, env: LiftEnv):
+    def object_desired_orientations(self, env: LiftEnv):
-        """Last actions provided to env."""
+        """Desired object orientation."""
-        return env.actions
+        # make the first element positive
+        quat_w = env.object_des_pose_w[:, 3:7]
+        quat_w[quat_w[:, 0] < 0] *= -1
+        return quat_w
+    def arm_actions(self, env: LiftEnv):
+        """Last arm actions provided to env."""
+        return env.actions[:, :-1]
+    def tool_actions(self, env: LiftEnv):
+        """Last tool actions provided to env."""
+        return env.actions[:, -1].unsqueeze(1)
+    def tool_actions_bool(self, env: LiftEnv):
+        """Last tool actions transformed to a boolean command."""
+        return torch.sign(env.actions[:, -1]).unsqueeze(1)
 class LiftRewardManager(RewardManager):
@@ -398,27 +447,53 @@ class LiftRewardManager(RewardManager):
    def reaching_object_position_exp(self, env: LiftEnv, sigma: float):
        """Penalize end-effector tracking position error using exp-kernel."""
-        return torch.sum(torch.square(env.robot.data.ee_state_w[:, 0:3] - env.object.data.root_pos_w), dim=1)
+        error = torch.sum(torch.square(env.robot.data.ee_state_w[:, 0:3] - env.object.data.root_pos_w), dim=1)
+        return torch.exp(-error / sigma)
-    def penalizing_robot_dof_velocity_l2(self, env: LiftEnv):
+    def reaching_object_position_tanh(self, env: LiftEnv, sigma: float):
+        """Penalize tool sites tracking position error using tanh-kernel."""
+        # distance of end-effector to the object: (num_envs,)
+        ee_distance = torch.norm(env.robot.data.ee_state_w[:, 0:3] - env.object.data.root_pos_w, dim=1)
+        # distance of the tool sites to the object: (num_envs, num_tool_sites)
+        object_root_pos = env.object.data.root_pos_w.unsqueeze(1)  # (num_envs, 1, 3)
+        tool_sites_distance = torch.norm(env.robot.data.tool_sites_state_w[:, :, :3] - object_root_pos, dim=-1)
+        # average distance of the tool sites to the object: (num_envs,)
+        # note: we add the ee distance to the average to make sure that the ee is always closer to the object
+        num_tool_sites = tool_sites_distance.shape[1]
+        average_distance = (ee_distance + torch.sum(tool_sites_distance, dim=1)) / (num_tool_sites + 1)
+        return 1 - torch.tanh(average_distance / sigma)
+    def penalizing_arm_dof_velocity_l2(self, env: LiftEnv):
        """Penalize large movements of the robot arm."""
        return -torch.sum(torch.square(env.robot.data.arm_dof_vel), dim=1)
-    def penalizing_robot_dof_acceleration_l2(self, env: LiftEnv):
+    def penalizing_tool_dof_velocity_l2(self, env: LiftEnv):
-        """Penalize fast movements of the robot arm."""
+        """Penalize large movements of the robot tool."""
-        return -torch.sum(torch.square(env.robot.data.dof_acc), dim=1)
+        return -torch.sum(torch.square(env.robot.data.tool_dof_vel), dim=1)
+    def penalizing_arm_action_rate_l2(self, env: LiftEnv):
+        """Penalize large variations in action commands besides tool."""
+        return -torch.sum(torch.square(env.actions[:, :-1] - env.previous_actions[:, :-1]), dim=1)
-    def penalizing_action_rate_l2(self, env: LiftEnv):
+    def penalizing_tool_action_l2(self, env: LiftEnv):
-        """Penalize large variations in action commands."""
+        """Penalize large values in action commands for the tool."""
-        return -torch.sum(torch.square(env.actions - env.previous_actions), dim=1)
+        return -torch.square(env.actions[:, -1])
-    def tracking_object_position_exp(self, env: LiftEnv, sigma: float):
+    def tracking_object_position_exp(self, env: LiftEnv, sigma: float, threshold: float):
        """Penalize tracking object position error using exp-kernel."""
-        initial_error = torch.sum(torch.square(env.object_des_pose_w[:, 0:3] - env.object_init_pose_w[:, 0:3]), dim=1)
+        # distance of the end-effector to the object: (num_envs,)
-        current_error = torch.sum(torch.square(env.object_des_pose_w[:, 0:3] - env.object.data.root_pos_w), dim=1)
+        error = torch.sum(torch.square(env.object_des_pose_w[:, 0:3] - env.object.data.root_pos_w), dim=1)
-        return torch.exp(-current_error / sigma) - torch.exp(-initial_error / sigma)
+        # rewarded if the object is lifted above the threshold
+        return (env.object.data.root_pos_w[:, 2] > threshold) * torch.exp(-error / sigma)
+    def tracking_object_position_tanh(self, env: LiftEnv, sigma: float, threshold: float):
+        """Penalize tracking object position error using tanh-kernel."""
+        # distance of the end-effector to the object: (num_envs,)
+        distance = torch.norm(env.object_des_pose_w[:, 0:3] - env.object.data.root_pos_w, dim=1)
+        # rewarded if the object is lifted above the threshold
+        return (env.object.data.root_pos_w[:, 2] > threshold) * (1 - torch.tanh(distance / sigma))
    def lifting_object_success(self, env: LiftEnv, threshold: float):
        """Sparse reward if object is lifted successfully."""
-        error = torch.sum(torch.square(env.object.data.root_pos_w - env.object_des_pose_w[:, 0:3]), dim=1)
+        return torch.where(env.object.data.root_pos_w[:, 2] > threshold, 1.0, 0.0)
-        return torch.where(error < threshold, 1.0, 0.0)
--- a/source/standalone/environments/state_machine/play_lift.py
+++ b/source/standalone/environments/state_machine/play_lift.py