Adds support for module:task and updates gymnasium to >=1.0 (#2467)

# Description Gymnasium 1.0 introduced support for specifying module:task to automatically import modules instead of pre-importing task modules. This PR adds support for this feature and enforces the gymnasium version to be >= 1.0. ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com>

Adds support for module:task and updates gymnasium to >=1.0 (#2467)
# Description Gymnasium 1.0 introduced support for specifying module:task to automatically import modules instead of pre-importing task modules. This PR adds support for this feature and enforces the gymnasium version to be >= 1.0. ## Type of change - New feature (non-breaking change which adds functionality) ## Checklist - [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with `./isaaclab.sh --format` - [x] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have updated the changelog and the corresponding version in the extension's `config/extension.toml` file - [ ] I have added my name to the `CONTRIBUTORS.md` or my name already exists there  --------- Signed-off-by: Kelly Guo <kellyg@nvidia.com>
9f1aa4cd · Kelly Guo · GitHub · d63e58f9 · 9f1aa4cd · 9f1aa4cd
Unverified Commit 9f1aa4cd authored Jun 06, 2025 by Kelly Guo Committed by GitHub Jun 06, 2025
15 changed files
--- a/scripts/imitation_learning/isaaclab_mimic/annotate_demos.py
+++ b/scripts/imitation_learning/isaaclab_mimic/annotate_demos.py
@@ -174,13 +174,13 @@ def main():
        os.makedirs(output_dir)
    if args_cli.task is not None:
-        env_name = args_cli.task
+        env_name = args_cli.task.split(":")[-1]
    if env_name is None:
        raise ValueError("Task/env name was not specified nor found in the dataset.")
    env_cfg = parse_env_cfg(env_name, device=args_cli.device, num_envs=1)
-    env_cfg.env_name = args_cli.task
+    env_cfg.env_name = env_name
    # extract success checking function to invoke manually
    success_term = None

--- a/scripts/imitation_learning/isaaclab_mimic/consolidated_demo.py
+++ b/scripts/imitation_learning/isaaclab_mimic/consolidated_demo.py
@@ -366,7 +366,7 @@ def main():
    # get the environment name
    if args_cli.task is not None:
-        env_name = args_cli.task
+        env_name = args_cli.task.split(":")[-1]
    elif args_cli.input_file:
        # if the environment name is not specified, try to get it from the dataset file
        dataset_file_handler = HDF5DatasetFileHandler()
@@ -406,7 +406,7 @@ def main():
        env_cfg.recorders.dataset_export_mode = DatasetExportMode.EXPORT_SUCCEEDED_ONLY
    # create environment
-    env = gym.make(env_name, cfg=env_cfg)
+    env = gym.make(args_cli.task, cfg=env_cfg)
    if not isinstance(env.unwrapped, ManagerBasedRLMimicEnv):
        raise ValueError("The environment should be derived from ManagerBasedRLMimicEnv")

--- a/scripts/imitation_learning/isaaclab_mimic/generate_dataset.py
+++ b/scripts/imitation_learning/isaaclab_mimic/generate_dataset.py
@@ -86,7 +86,10 @@ def main():
    # Setup output paths and get env name
    output_dir, output_file_name = setup_output_paths(args_cli.output_file)
-    env_name = args_cli.task or get_env_name_from_dataset(args_cli.input_file)
+    task_name = args_cli.task
+    if task_name:
+        task_name = args_cli.task.split(":")[-1]
+    env_name = task_name or get_env_name_from_dataset(args_cli.input_file)
    # Configure environment
    env_cfg, success_term = setup_env_config(

--- a/scripts/imitation_learning/robomimic/train.py
+++ b/scripts/imitation_learning/robomimic/train.py
@@ -359,15 +359,16 @@ def main(args: argparse.Namespace):
    if args.task is not None:
        # obtain the configuration entry point
        cfg_entry_point_key = f"robomimic_{args.algo}_cfg_entry_point"
+        task_name = args.task.split(":")[-1]
-        print(f"Loading configuration for task: {args.task}")
+        print(f"Loading configuration for task: {task_name}")
        print(gym.envs.registry.keys())
        print(" ")
-        cfg_entry_point_file = gym.spec(args.task).kwargs.pop(cfg_entry_point_key)
+        cfg_entry_point_file = gym.spec(task_name).kwargs.pop(cfg_entry_point_key)
        # check if entry point exists
        if cfg_entry_point_file is None:
            raise ValueError(
-                f"Could not find configuration for the environment: '{args.task}'."
+                f"Could not find configuration for the environment: '{task_name}'."
                f" Please check that the gym registry has the entry point: '{cfg_entry_point_key}'."
            )

--- a/scripts/reinforcement_learning/rl_games/play.py
+++ b/scripts/reinforcement_learning/rl_games/play.py
@@ -77,6 +77,7 @@ from isaaclab_tasks.utils import get_checkpoint_path, load_cfg_from_registry, pa
 def main():
    """Play with RL-Games agent."""
+    task_name = args_cli.task.split(":")[-1]
    # parse env configuration
    env_cfg = parse_env_cfg(
        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
@@ -89,7 +90,7 @@ def main():
    print(f"[INFO] Loading experiment from directory: {log_root_path}")
    # find checkpoint
    if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("rl_games", args_cli.task)
+        resume_path = get_published_pretrained_checkpoint("rl_games", task_name)
        if not resume_path:
            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
            return

--- a/scripts/reinforcement_learning/rsl_rl/play.py
+++ b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -71,18 +71,19 @@ from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg
 def main():
    """Play with RSL-RL agent."""
+    task_name = args_cli.task.split(":")[-1]
    # parse configuration
    env_cfg = parse_env_cfg(
        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    )
-    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
+    agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(task_name, args_cli)
    # specify directory for logging experiments
    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
    log_root_path = os.path.abspath(log_root_path)
    print(f"[INFO] Loading experiment from directory: {log_root_path}")
    if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("rsl_rl", args_cli.task)
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", task_name)
        if not resume_path:
            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
            return

--- a/scripts/reinforcement_learning/sb3/play.py
+++ b/scripts/reinforcement_learning/sb3/play.py
@@ -80,12 +80,14 @@ def main():
    )
    agent_cfg = load_cfg_from_registry(args_cli.task, "sb3_cfg_entry_point")
+    task_name = args_cli.task.split(":")[-1]
    # directory for logging into
-    log_root_path = os.path.join("logs", "sb3", args_cli.task)
+    log_root_path = os.path.join("logs", "sb3", task_name)
    log_root_path = os.path.abspath(log_root_path)
    # checkpoint and log_dir stuff
    if args_cli.use_pretrained_checkpoint:
-        checkpoint_path = get_published_pretrained_checkpoint("sb3", args_cli.task)
+        checkpoint_path = get_published_pretrained_checkpoint("sb3", task_name)
        if not checkpoint_path:
            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
            return

--- a/scripts/reinforcement_learning/skrl/play.py
+++ b/scripts/reinforcement_learning/skrl/play.py
@@ -108,14 +108,16 @@ def main():
    if args_cli.ml_framework.startswith("jax"):
        skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy"
+    task_name = args_cli.task.split(":")[-1]
    # parse configuration
    env_cfg = parse_env_cfg(
        args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
    )
    try:
-        experiment_cfg = load_cfg_from_registry(args_cli.task, f"skrl_{algorithm}_cfg_entry_point")
+        experiment_cfg = load_cfg_from_registry(task_name, f"skrl_{algorithm}_cfg_entry_point")
    except ValueError:
-        experiment_cfg = load_cfg_from_registry(args_cli.task, "skrl_cfg_entry_point")
+        experiment_cfg = load_cfg_from_registry(task_name, "skrl_cfg_entry_point")
    # specify directory for logging experiments (load checkpoint)
    log_root_path = os.path.join("logs", "skrl", experiment_cfg["agent"]["experiment"]["directory"])
@@ -123,7 +125,7 @@ def main():
    print(f"[INFO] Loading experiment from directory: {log_root_path}")
    # get checkpoint path
    if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("skrl", args_cli.task)
+        resume_path = get_published_pretrained_checkpoint("skrl", task_name)
        if not resume_path:
            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
            return

--- a/scripts/tools/record_demos.py
+++ b/scripts/tools/record_demos.py
@@ -209,7 +209,7 @@ def main():
    # parse configuration
    env_cfg = parse_env_cfg(args_cli.task, device=args_cli.device, num_envs=1)
-    env_cfg.env_name = args_cli.task
+    env_cfg.env_name = args_cli.task.split(":")[-1]
    # extract success checking function to invoke in the main loop
    success_term = None

--- a/scripts/tools/replay_demos.py
+++ b/scripts/tools/replay_demos.py
@@ -140,7 +140,7 @@ def main():
        episode_indices_to_replay = list(range(episode_count))
    if args_cli.task is not None:
-        env_name = args_cli.task
+        env_name = args_cli.task.split(":")[-1]
    if env_name is None:
        raise ValueError("Task/env name was not specified nor found in the dataset.")
@@ -153,7 +153,7 @@ def main():
    env_cfg.terminations = {}
    # create environment from loaded config
-    env = gym.make(env_name, cfg=env_cfg).unwrapped
+    env = gym.make(args_cli.task, cfg=env_cfg).unwrapped
    teleop_interface = Se3Keyboard(pos_sensitivity=0.1, rot_sensitivity=0.1)
    teleop_interface.add_callback("N", play_cb)

--- a/source/isaaclab/config/extension.toml
+++ b/source/isaaclab/config/extension.toml
 [package]
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.40.1"
+version = "0.40.2"
 # Description
 title = "Isaac Lab framework for Robot Learning"

--- a/source/isaaclab/docs/CHANGELOG.rst
+++ b/source/isaaclab/docs/CHANGELOG.rst
 Changelog
 ---------
+0.40.2 (2025-05-10)
+~~~~~~~~~~~~~~~~~~~
+Added
+^^^^^
+* Updated gymnasium to >= 1.0
+* Added support for specifying module:task_name as task name to avoid module import for ``gym.make``
 0.40.1 (2025-06-02)
 ~~~~~~~~~~~~~~~~~~~
 Added
 ^^^^^
 * Added time observation functions to ~isaaclab.envs.mdp.observations module,
  :func:`~isaaclab.envs.mdp.observations.current_time_s` and :func:`~isaaclab.envs.mdp.observations.remaining_time_s`.
@@ -108,9 +119,6 @@ Fixed
 0.39.1 (2025-05-14)
 ~~~~~~~~~~~~~~~~~~~
-Added
-^^^^^
 * Added a new attribute :attr:`articulation_root_prim_path` to the :class:`~isaaclab.assets.ArticulationCfg` class
  to allow explicitly specifying the prim path of the articulation root.

--- a/source/isaaclab/setup.py
+++ b/source/isaaclab/setup.py
@@ -32,7 +32,7 @@ INSTALL_REQUIRES = [
    # devices
    "hidapi==0.14.0.post2",
    # reinforcement learning
-    "gymnasium",
+    "gymnasium>=1.0",
    # procedural-generation
    "trimesh",
    "pyglet<2",

--- a/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/utils/hydra.py
@@ -83,10 +83,10 @@ def hydra_task_config(task_name: str, agent_cfg_entry_point: str) -> Callable:
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            # register the task to Hydra
-            env_cfg, agent_cfg = register_task_to_hydra(task_name, agent_cfg_entry_point)
+            env_cfg, agent_cfg = register_task_to_hydra(task_name.split(":")[-1], agent_cfg_entry_point)
            # define the new Hydra main function
-            @hydra.main(config_path=None, config_name=task_name, version_base="1.3")
+            @hydra.main(config_path=None, config_name=task_name.split(":")[-1], version_base="1.3")
            def hydra_main(hydra_env_cfg: DictConfig, env_cfg=env_cfg, agent_cfg=agent_cfg):
                # convert to a native dictionary
                hydra_env_cfg = OmegaConf.to_container(hydra_env_cfg, resolve=True)

--- a/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py
@@ -57,7 +57,7 @@ def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | objec
        ValueError: If the entry point key is not available in the gym registry for the task.
    """
    # obtain the configuration entry point
-    cfg_entry_point = gym.spec(task_name).kwargs.get(entry_point_key)
+    cfg_entry_point = gym.spec(task_name.split(":")[-1]).kwargs.get(entry_point_key)
    # check if entry point exists
    if cfg_entry_point is None:
        raise ValueError(
@@ -122,7 +122,7 @@ def parse_env_cfg(
            environment configuration.
    """
    # load the default configuration
-    cfg = load_cfg_from_registry(task_name, "env_cfg_entry_point")
+    cfg = load_cfg_from_registry(task_name.split(":")[-1], "env_cfg_entry_point")
    # check that it is not a dict
    # we assume users always use a class for the configuration