Improves behavior for Franka Cabinet environment (#909)

# Description Isaac-Franka-Cabinet-Direct-v0 sometimes had unstable behaviors with trained policies where the robot wasn't always able to achieve the task. This change makes slight modifications to the reward function to improve the behavior for the policy. ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there

Improves behavior for Franka Cabinet environment (#909)
# Description Isaac-Franka-Cabinet-Direct-v0 sometimes had unstable behaviors with trained policies where the robot wasn't always able to achieve the task. This change makes slight modifications to the reward function to improve the behavior for the policy. ## Type of change - Bug fix (non-breaking change which fixes an issue) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there
a861d576 · Kelly Guo · GitHub · 7b3ca4ff · a861d576 · a861d576
Unverified Commit a861d576 authored Sep 03, 2024 by Kelly Guo Committed by GitHub Sep 03, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 53 deletions

rsl_rl_ppo_cfg.py .../lab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py +1 -1

franka_cabinet_env.py ...aac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py +23 -52

No files found.
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py
@@ -18,7 +18,7 @@ class FrankaCabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg):
    max_iterations = 1500
    save_interval = 50
    experiment_name = "franka_cabinet_direct"
-    empirical_normalization = False
+    empirical_normalization = True
    policy = RslRlPpoActorCriticCfg(
        init_noise_std=1.0,
        actor_hidden_dims=[256, 128, 64],

--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
@@ -155,13 +155,11 @@ class FrankaCabinetEnvCfg(DirectRLEnvCfg):
    dof_velocity_scale = 0.1
    # reward scales
-    dist_reward_scale = 2.0
+    dist_reward_scale = 1.5
-    rot_reward_scale = 0.5
+    rot_reward_scale = 1.5
-    around_handle_reward_scale = 0.0
+    open_reward_scale = 10.0
-    open_reward_scale = 7.5
+    action_penalty_scale = 0.05
-    action_penalty_scale = 0.01
+    finger_reward_scale = 2.0
-    finger_dist_reward_scale = 0.0
-    finger_close_reward_scale = 10.0
 class FrankaCabinetEnv(DirectRLEnv):
@@ -320,12 +318,10 @@ class FrankaCabinetEnv(DirectRLEnv):
            self.num_envs,
            self.cfg.dist_reward_scale,
            self.cfg.rot_reward_scale,
-            self.cfg.around_handle_reward_scale,
            self.cfg.open_reward_scale,
-            self.cfg.finger_dist_reward_scale,
            self.cfg.action_penalty_scale,
+            self.cfg.finger_reward_scale,
            self._robot.data.joint_pos,
-            self.cfg.finger_close_reward_scale,
        )
    def _reset_idx(self, env_ids: torch.Tensor | None):
@@ -413,12 +409,10 @@ class FrankaCabinetEnv(DirectRLEnv):
        num_envs,
        dist_reward_scale,
        rot_reward_scale,
-        around_handle_reward_scale,
        open_reward_scale,
-        finger_dist_reward_scale,
        action_penalty_scale,
+        finger_reward_scale,
        joint_positions,
-        finger_close_reward_scale,
    ):
        # distance from hand to the drawer
        d = torch.norm(franka_grasp_pos - drawer_grasp_pos, p=2, dim=-1)
@@ -440,64 +434,41 @@ class FrankaCabinetEnv(DirectRLEnv):
        # reward for matching the orientation of the hand to the drawer (fingers wrapped)
        rot_reward = 0.5 * (torch.sign(dot1) * dot1**2 + torch.sign(dot2) * dot2**2)
-        # bonus if left finger is above the drawer handle and right below
-        around_handle_reward = torch.zeros_like(rot_reward)
-        around_handle_reward = torch.where(
-            franka_lfinger_pos[:, 2] > drawer_grasp_pos[:, 2],
-            torch.where(
-                franka_rfinger_pos[:, 2] < drawer_grasp_pos[:, 2], around_handle_reward + 0.5, around_handle_reward
-            ),
-            around_handle_reward,
-        )
-        # reward for distance of each finger from the drawer
-        finger_dist_reward = torch.zeros_like(rot_reward)
-        lfinger_dist = torch.abs(franka_lfinger_pos[:, 2] - drawer_grasp_pos[:, 2])
-        rfinger_dist = torch.abs(franka_rfinger_pos[:, 2] - drawer_grasp_pos[:, 2])
-        finger_dist_reward = torch.where(
-            franka_lfinger_pos[:, 2] > drawer_grasp_pos[:, 2],
-            torch.where(
-                franka_rfinger_pos[:, 2] < drawer_grasp_pos[:, 2],
-                (0.04 - lfinger_dist) + (0.04 - rfinger_dist),
-                finger_dist_reward,
-            ),
-            finger_dist_reward,
-        )
-        finger_close_reward = torch.zeros_like(rot_reward)
-        finger_close_reward = torch.where(
-            d <= 0.03, (0.04 - joint_positions[:, 7]) + (0.04 - joint_positions[:, 8]), finger_close_reward
-        )
        # regularization on the actions (summed for each environment)
        action_penalty = torch.sum(actions**2, dim=-1)
        # how far the cabinet has been opened out
-        open_reward = cabinet_dof_pos[:, 3] * around_handle_reward + cabinet_dof_pos[:, 3]  # drawer_top_joint
+        open_reward = cabinet_dof_pos[:, 3]  # drawer_top_joint
+        # penalty for distance of each finger from the drawer handle
+        lfinger_dist = franka_lfinger_pos[:, 2] - drawer_grasp_pos[:, 2]
+        rfinger_dist = drawer_grasp_pos[:, 2] - franka_rfinger_pos[:, 2]
+        finger_dist_penalty = torch.zeros_like(lfinger_dist)
+        finger_dist_penalty += torch.where(lfinger_dist < 0, lfinger_dist, torch.zeros_like(lfinger_dist))
+        finger_dist_penalty += torch.where(rfinger_dist < 0, rfinger_dist, torch.zeros_like(rfinger_dist))
        rewards = (
            dist_reward_scale * dist_reward
            + rot_reward_scale * rot_reward
-            + around_handle_reward_scale * around_handle_reward
            + open_reward_scale * open_reward
-            + finger_dist_reward_scale * finger_dist_reward
+            + finger_reward_scale * finger_dist_penalty
            - action_penalty_scale * action_penalty
-            + finger_close_reward * finger_close_reward_scale
        )
        self.extras["log"] = {
            "dist_reward": (dist_reward_scale * dist_reward).mean(),
            "rot_reward": (rot_reward_scale * rot_reward).mean(),
-            "around_handle_reward": (around_handle_reward_scale * around_handle_reward).mean(),
            "open_reward": (open_reward_scale * open_reward).mean(),
-            "finger_dist_reward": (finger_dist_reward_scale * finger_dist_reward).mean(),
+            "action_penalty": (-action_penalty_scale * action_penalty).mean(),
-            "action_penalty": (action_penalty_scale * action_penalty).mean(),
+            "left_finger_distance_reward": (finger_reward_scale * lfinger_dist).mean(),
-            "finger_close_reward": (finger_close_reward * finger_close_reward_scale).mean(),
+            "right_finger_distance_reward": (finger_reward_scale * rfinger_dist).mean(),
+            "finger_dist_penalty": (finger_reward_scale * finger_dist_penalty).mean(),
        }
        # bonus for opening drawer properly
-        rewards = torch.where(cabinet_dof_pos[:, 3] > 0.01, rewards + 0.5, rewards)
+        rewards = torch.where(cabinet_dof_pos[:, 3] > 0.01, rewards + 0.25, rewards)
-        rewards = torch.where(cabinet_dof_pos[:, 3] > 0.2, rewards + around_handle_reward, rewards)
+        rewards = torch.where(cabinet_dof_pos[:, 3] > 0.2, rewards + 0.25, rewards)
-        rewards = torch.where(cabinet_dof_pos[:, 3] > 0.39, rewards + (2.0 * around_handle_reward), rewards)
+        rewards = torch.where(cabinet_dof_pos[:, 3] > 0.35, rewards + 0.25, rewards)
        return rewards