# SPDX-FileCopyrightText: Copyright (c) 2025-2026 The ProtoMotions Developers # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """ NV-SMPL Mimic on Isaac Lab Terrain ==================================== Mimic training for the NV-SMPL humanoid on terrain generated natively by Isaac Lab at training time (no pre-export step). The robot spawns at the exact root position encoded in the motion file (zero respawn offset). Usage:: python protomotions/train_agent.py \\ --robot-name nv_smpl \\ --simulator isaaclab \\ --experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\ --motion-file \\ --num-envs 64 """ from protomotions.robot_configs.base import RobotConfig from protomotions.simulator.base_simulator.config import SimulatorConfig from protomotions.components.terrains.config import TerrainConfig from protomotions.envs.base_env.config import EnvConfig from protomotions.agents.ppo.config import PPOAgentConfig from protomotions.components.scene_lib import SceneLibConfig from protomotions.components.motion_lib import MotionLibConfig import argparse _STAIR_WIDTHS_CM = [26, 27, 28] _N_STAIR = len(_STAIR_WIDTHS_CM) _N_COLS = _N_STAIR * 2 + 2 _PROP = 1.0 / _N_COLS _STAIR_COMMON = dict( step_height_range=(0.180, 0.215), platform_width=2.5, border_width=0.75, holes=False, ) _sub_terrains_spec = {} for _w_cm in _STAIR_WIDTHS_CM: _sub_terrains_spec[f"stairs_up_w{_w_cm}"] = { "_target_": "MeshPyramidStairsTerrainCfg", "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON, } for _w_cm in _STAIR_WIDTHS_CM: _sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = { "_target_": "MeshInvertedPyramidStairsTerrainCfg", "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON, } _sub_terrains_spec["slope_up"] = { "_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg", "proportion": _PROP, "slope_range": (0.0, 0.3640), "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256), } _sub_terrains_spec["slope_down"] = { "_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg", "proportion": _PROP, "slope_range": (0.0, 0.3640), "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256), } TERRAIN_GENERATOR_SPEC = { "size": (32.0, 32.0), "border_width": 4.0, "num_rows": 8, "num_cols": _N_COLS, "horizontal_scale": 0.05, "vertical_scale": 0.005, "slope_threshold": None, "use_cache": False, "curriculum": True, "color_scheme": "height", "sub_terrains": _sub_terrains_spec, } def terrain_config(args: argparse.Namespace): """Build terrain configuration — Isaac Lab generates terrain natively.""" return TerrainConfig( use_isaaclab_generator=True, map_length=32.0, map_width=32.0, num_levels=8, num_terrains=8, horizontal_scale=0.05, vertical_scale=0.005, border_size=4.0, minimal_humanoid_spacing=0.0, ) def scene_lib_config(args: argparse.Namespace): """Build scene library configuration.""" scene_file = args.scenes_file if hasattr(args, "scenes_file") else None return SceneLibConfig(scene_file=scene_file) def motion_lib_config(args: argparse.Namespace): """Build motion library configuration.""" return MotionLibConfig(motion_file=args.motion_file) def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig: """Build environment configuration (training defaults).""" from protomotions.envs.motion_manager.config import MimicMotionManagerConfig from protomotions.envs.control.mimic_control import MimicControlConfig from protomotions.envs.component_factories import ( max_coords_obs_factory, previous_actions_factory, mimic_target_poses_max_coords_factory, action_smoothness_factory, mimic_tracking_rewards_factory, root_axis_rew_factory, pow_rew_factory, contact_match_rew_factory, tracking_error_term_factory, relative_body_pos_rew_factory, relative_body_ori_rew_factory, ) from protomotions.envs.action import make_pd_action_config control_components = { "mimic": MimicControlConfig( bootstrap_on_episode_end=True, ) } observation_components = { "max_coords_obs": max_coords_obs_factory(), "previous_actions": previous_actions_factory(history_steps=1), "mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True), } termination_components = { "tracking_error": tracking_error_term_factory(threshold=0.5), } reward_components = { "action_smoothness": action_smoothness_factory(weight=-0.02), **mimic_tracking_rewards_factory( gt_weight=0.5, gr_weight=0.3, gv_weight=0.1, gav_weight=0.2, rh_weight=0.2, gt_coef=-5.0, gr_coef=-5.0, gv_coef=-0.5, gav_coef=-0.1, rh_coef=-100.0, ), "rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0), "ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0), "pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5), "contact_match_rew": contact_match_rew_factory( weight=-0.1, zero_during_grace_period=True ), "rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3), "rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4), } return EnvConfig( ref_contact_smooth_window=7, max_episode_length=1000, num_state_history_steps=2, use_motion_root_position=True, control_components=control_components, observation_components=observation_components, termination_components=termination_components, reward_components=reward_components, action_config=make_pd_action_config(robot_cfg), motion_manager=MimicMotionManagerConfig( init_start_prob=1.0, resample_on_reset=True, ), ) def agent_config( robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace ) -> PPOAgentConfig: """Build agent configuration.""" from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig from protomotions.agents.ppo.config import ( PPOActorConfig, PPOModelConfig, AdvantageNormalizationConfig, ) from protomotions.agents.base_agent.config import OptimizerConfig from protomotions.agents.evaluators.config import ( MimicEvaluatorConfig, MotionWeightsRulesConfig, ) from protomotions.envs.component_factories import ( gt_error_factory, gr_error_factory, max_joint_error_factory, relative_body_pos_rew_factory, relative_body_ori_rew_factory, ) actor_config = PPOActorConfig( num_out=robot_config.kinematic_info.num_dofs, actor_logstd=-2.9, in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"], mu_key="actor_trunk_out", mu_model=MLPWithConcatConfig( in_keys=[ "max_coords_obs", "terrain", "mimic_target_poses", "previous_actions", ], normalize_obs=True, norm_clamp_value=5, out_keys=["actor_trunk_out"], num_out=robot_config.number_of_actions, layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)], ), ) critic_config = MLPWithConcatConfig( in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"], out_keys=["value"], normalize_obs=True, norm_clamp_value=5, num_out=1, layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)], ) agent_config: PPOAgentConfig = PPOAgentConfig( model=PPOModelConfig( in_keys=[ "max_coords_obs", "terrain", "mimic_target_poses", "previous_actions", ], out_keys=["action", "mean_action", "neglogp", "value"], actor=actor_config, critic=critic_config, actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5), critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4), ), batch_size=args.batch_size, training_max_steps=args.training_max_steps, gradient_clip_val=50.0, clip_critic_loss=True, evaluator=MimicEvaluatorConfig( evaluation_components={ "gt_error": gt_error_factory(threshold=0.5), "gr_error": gr_error_factory(), "max_joint_error": max_joint_error_factory(), "rel_body_pos_rew": relative_body_pos_rew_factory( weight=1.0, sigma=0.3 ), "rel_body_ori_rew": relative_body_ori_rew_factory( weight=1.0, sigma=0.4 ), }, motion_weights_rules=MotionWeightsRulesConfig( motion_weights_update_success_discount=0.999, motion_weights_update_failure_discount=0, ), ), advantage_normalization=AdvantageNormalizationConfig( enabled=True, shift_mean=True, use_ema=True ), ) return agent_config def configure_robot_and_simulator( robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace ): """Configure robot and simulator for terrain training.""" robot_cfg.update_fields( contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"] ) if hasattr(simulator_cfg, "terrain_generator_spec"): simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC def apply_inference_overrides( robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, env_cfg, agent_cfg, terrain_cfg: TerrainConfig, motion_lib_cfg: MotionLibConfig, scene_lib_cfg: SceneLibConfig, args: argparse.Namespace, ): """Apply evaluation-specific overrides.""" if hasattr(env_cfg, "termination_components") and env_cfg.termination_components: env_cfg.termination_components = {} env_cfg.max_episode_length = 1000000 env_cfg.motion_manager.resample_on_reset = True env_cfg.motion_manager.init_start_prob = 1.0