Unverified Commit db73f9d4 authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Fixes configurations for Stable-Baselines3 workflow (#61)

* fixes sb3 configs for missing agent seed
* fixes encoding issue when loading yaml configs
* adds launch.json task for running environment
parent 054974bf
...@@ -24,5 +24,19 @@ ...@@ -24,5 +24,19 @@
"port": 3000, "port": 3000,
"host": "localhost" "host": "localhost"
}, },
{
"name": "Python: Run Environment",
"type": "python",
"request": "launch",
"args" : ["--task", "Isaac-Reach-Franka-v0"],
"program": "${file}",
"console": "integratedTerminal",
"env": {
"EXP_PATH": "${workspaceFolder}/_isaac_sim/apps",
"RESOURCE_NAME": "IsaacSim"
},
"envFile": "${workspaceFolder}/.vscode/.python.env",
"preLaunchTask": "setup_python_env"
},
] ]
} }
...@@ -329,7 +329,7 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso ...@@ -329,7 +329,7 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso
# Thus, axis-angle is [q_x, q_y, q_z] / (sin(theta/2) / theta) # Thus, axis-angle is [q_x, q_y, q_z] / (sin(theta/2) / theta)
# When theta = 0, (sin(theta/2) / theta) is undefined # When theta = 0, (sin(theta/2) / theta) is undefined
# However, as theta --> 0, we can use the Taylor approximation 1/2 - theta^2 / 48 # However, as theta --> 0, we can use the Taylor approximation 1/2 - theta^2 / 48
quat = quat * (1.0 - 2. * (quat[..., 0:1] < 0.0)) quat = quat * (1.0 - 2.0 * (quat[..., 0:1] < 0.0))
mag = torch.linalg.norm(quat[..., 1:], dim=1) mag = torch.linalg.norm(quat[..., 1:], dim=1)
half_angle = torch.atan2(mag, quat[..., 0]) half_angle = torch.atan2(mag, quat[..., 0])
angle = 2.0 * half_angle angle = 2.0 * half_angle
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L161 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L161
seed: 42
n_timesteps: !!float 2e6 n_timesteps: !!float 2e6
policy: 'MlpPolicy' policy: 'MlpPolicy'
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
# 512×1500×64
seed: 42 seed: 42
n_timesteps: !!float 49152000
# 512×1500×64
n_timesteps: 49152000
policy: 'MlpPolicy' policy: 'MlpPolicy'
n_steps: 64 n_steps: 64
batch_size: 4096 batch_size: 4096
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 67854 seed: 42
# 512×500×16 # 512×500×16
n_timesteps: 4096000 n_timesteps: 4096000
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L457 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L457
seed: 42
policy: 'MlpPolicy' policy: 'MlpPolicy'
n_timesteps: !!float 1e7 n_timesteps: !!float 1e7
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 # Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
# epoch * n_steps * nenvs: 500×512*8*8 # epoch * n_steps * nenvs: 500×512*8*8
n_timesteps: !!float 16384000 n_timesteps: 16384000
policy: 'MlpPolicy' policy: 'MlpPolicy'
n_steps: 64 n_steps: 64
# mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048 # mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048
...@@ -16,13 +18,9 @@ policy_kwargs: "dict( ...@@ -16,13 +18,9 @@ policy_kwargs: "dict(
activation_fn=nn.ELU, activation_fn=nn.ELU,
net_arch=[32, 32, dict(pi=[256, 128, 64], vf=[256, 128, 64])] net_arch=[32, 32, dict(pi=[256, 128, 64], vf=[256, 128, 64])]
)" )"
target_kl: 0.01 target_kl: 0.01
max_grad_norm: 1.0 max_grad_norm: 1.0
# # Uses VecNormalize class to normalize obs # # Uses VecNormalize class to normalize obs
# normalize_input: True # normalize_input: True
# # Uses VecNormalize class to normalize rew # # Uses VecNormalize class to normalize rew
......
...@@ -41,7 +41,7 @@ def parse_rlg_cfg(task_name) -> dict: ...@@ -41,7 +41,7 @@ def parse_rlg_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}") raise ValueError(f"Task not found: {task_name}")
# parse agent configuration # parse agent configuration
with open(config_file) as f: with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.Loader) cfg = yaml.load(f, Loader=yaml.Loader)
return cfg return cfg
...@@ -42,7 +42,7 @@ def parse_rslrl_cfg(task_name) -> dict: ...@@ -42,7 +42,7 @@ def parse_rslrl_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}") raise ValueError(f"Task not found: {task_name}")
# parse agent configuration # parse agent configuration
with open(config_file) as f: with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader) cfg = yaml.load(f, Loader=yaml.FullLoader)
return cfg return cfg
...@@ -40,7 +40,7 @@ def parse_sb3_cfg(task_name) -> dict: ...@@ -40,7 +40,7 @@ def parse_sb3_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}. Configurations exist for {SB3_PPO_CONFIG_FILE.keys()}") raise ValueError(f"Task not found: {task_name}. Configurations exist for {SB3_PPO_CONFIG_FILE.keys()}")
# parse agent configuration # parse agent configuration
with open(config_file) as f: with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader) cfg = yaml.load(f, Loader=yaml.FullLoader)
# check config is valid # check config is valid
if cfg is None: if cfg is None:
......
...@@ -48,7 +48,7 @@ def parse_skrl_cfg(task_name) -> dict: ...@@ -48,7 +48,7 @@ def parse_skrl_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}") raise ValueError(f"Task not found: {task_name}")
# parse agent configuration # parse agent configuration
with open(config_file) as f: with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.Loader) cfg = yaml.load(f, Loader=yaml.Loader)
return cfg return cfg
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment