Unverified Commit db73f9d4 authored by Mayank Mittal's avatar Mayank Mittal Committed by GitHub

Fixes configurations for Stable-Baselines3 workflow (#61)

* fixes sb3 configs for missing agent seed
* fixes encoding issue when loading yaml configs
* adds launch.json task for running environment
parent 054974bf
......@@ -24,5 +24,19 @@
"port": 3000,
"host": "localhost"
},
{
"name": "Python: Run Environment",
"type": "python",
"request": "launch",
"args" : ["--task", "Isaac-Reach-Franka-v0"],
"program": "${file}",
"console": "integratedTerminal",
"env": {
"EXP_PATH": "${workspaceFolder}/_isaac_sim/apps",
"RESOURCE_NAME": "IsaacSim"
},
"envFile": "${workspaceFolder}/.vscode/.python.env",
"preLaunchTask": "setup_python_env"
},
]
}
......@@ -329,7 +329,7 @@ def axis_angle_from_quat(quat: torch.Tensor, eps: float = 1.0e-6) -> torch.Tenso
# Thus, axis-angle is [q_x, q_y, q_z] / (sin(theta/2) / theta)
# When theta = 0, (sin(theta/2) / theta) is undefined
# However, as theta --> 0, we can use the Taylor approximation 1/2 - theta^2 / 48
quat = quat * (1.0 - 2. * (quat[..., 0:1] < 0.0))
quat = quat * (1.0 - 2.0 * (quat[..., 0:1] < 0.0))
mag = torch.linalg.norm(quat[..., 1:], dim=1)
half_angle = torch.atan2(mag, quat[..., 0])
angle = 2.0 * half_angle
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L161
seed: 42
n_timesteps: !!float 2e6
policy: 'MlpPolicy'
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
# 512×1500×64
seed: 42
n_timesteps: !!float 49152000
# 512×1500×64
n_timesteps: 49152000
policy: 'MlpPolicy'
n_steps: 64
batch_size: 4096
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 67854
seed: 42
# 512×500×16
n_timesteps: 4096000
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L457
seed: 42
policy: 'MlpPolicy'
n_timesteps: !!float 1e7
......
# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32
seed: 42
# epoch * n_steps * nenvs: 500×512*8*8
n_timesteps: !!float 16384000
n_timesteps: 16384000
policy: 'MlpPolicy'
n_steps: 64
# mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048
......@@ -16,13 +18,9 @@ policy_kwargs: "dict(
activation_fn=nn.ELU,
net_arch=[32, 32, dict(pi=[256, 128, 64], vf=[256, 128, 64])]
)"
target_kl: 0.01
max_grad_norm: 1.0
# # Uses VecNormalize class to normalize obs
# normalize_input: True
# # Uses VecNormalize class to normalize rew
......
......@@ -41,7 +41,7 @@ def parse_rlg_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file) as f:
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.Loader)
return cfg
......@@ -42,7 +42,7 @@ def parse_rslrl_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file) as f:
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
return cfg
......@@ -40,7 +40,7 @@ def parse_sb3_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}. Configurations exist for {SB3_PPO_CONFIG_FILE.keys()}")
# parse agent configuration
with open(config_file) as f:
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.FullLoader)
# check config is valid
if cfg is None:
......
......@@ -48,7 +48,7 @@ def parse_skrl_cfg(task_name) -> dict:
raise ValueError(f"Task not found: {task_name}")
# parse agent configuration
with open(config_file) as f:
with open(config_file, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.Loader)
return cfg
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment