| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """ |
| NV-SMPL Mimic on Isaac Lab Terrain |
| ==================================== |
| |
| Mimic training for the NV-SMPL humanoid on terrain generated natively by |
| Isaac Lab at training time (no pre-export step). The robot spawns at |
| the exact root position encoded in the motion file (zero respawn offset). |
| |
| Usage:: |
| |
| python protomotions/train_agent.py \\ |
| --robot-name nv_smpl \\ |
| --simulator isaaclab \\ |
| --experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\ |
| --motion-file <path_to_motion_pt> \\ |
| --num-envs 64 |
| """ |
| from protomotions.robot_configs.base import RobotConfig |
| from protomotions.simulator.base_simulator.config import SimulatorConfig |
| from protomotions.components.terrains.config import TerrainConfig |
| from protomotions.envs.base_env.config import EnvConfig |
| from protomotions.agents.ppo.config import PPOAgentConfig |
| from protomotions.components.scene_lib import SceneLibConfig |
| from protomotions.components.motion_lib import MotionLibConfig |
| import argparse |
|
|
| _STAIR_WIDTHS_CM = [26, 27, 28] |
| _N_STAIR = len(_STAIR_WIDTHS_CM) |
| _N_COLS = _N_STAIR * 2 + 2 |
| _PROP = 1.0 / _N_COLS |
|
|
| _STAIR_COMMON = dict( |
| step_height_range=(0.180, 0.215), |
| platform_width=2.5, |
| border_width=0.75, |
| holes=False, |
| ) |
|
|
| _sub_terrains_spec = {} |
| for _w_cm in _STAIR_WIDTHS_CM: |
| _sub_terrains_spec[f"stairs_up_w{_w_cm}"] = { |
| "_target_": "MeshPyramidStairsTerrainCfg", |
| "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON, |
| } |
| for _w_cm in _STAIR_WIDTHS_CM: |
| _sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = { |
| "_target_": "MeshInvertedPyramidStairsTerrainCfg", |
| "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON, |
| } |
| _sub_terrains_spec["slope_up"] = { |
| "_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg", |
| "proportion": _PROP, "slope_range": (0.0, 0.3640), |
| "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256), |
| } |
| _sub_terrains_spec["slope_down"] = { |
| "_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg", |
| "proportion": _PROP, "slope_range": (0.0, 0.3640), |
| "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256), |
| } |
|
|
| TERRAIN_GENERATOR_SPEC = { |
| "size": (32.0, 32.0), |
| "border_width": 4.0, |
| "num_rows": 8, |
| "num_cols": _N_COLS, |
| "horizontal_scale": 0.05, |
| "vertical_scale": 0.005, |
| "slope_threshold": None, |
| "use_cache": False, |
| "curriculum": True, |
| "color_scheme": "height", |
| "sub_terrains": _sub_terrains_spec, |
| } |
|
|
|
|
| def terrain_config(args: argparse.Namespace): |
| """Build terrain configuration — Isaac Lab generates terrain natively.""" |
| return TerrainConfig( |
| use_isaaclab_generator=True, |
| map_length=32.0, |
| map_width=32.0, |
| num_levels=8, |
| num_terrains=8, |
| horizontal_scale=0.05, |
| vertical_scale=0.005, |
| border_size=4.0, |
| minimal_humanoid_spacing=0.0, |
| ) |
|
|
|
|
| def scene_lib_config(args: argparse.Namespace): |
| """Build scene library configuration.""" |
| scene_file = args.scenes_file if hasattr(args, "scenes_file") else None |
| return SceneLibConfig(scene_file=scene_file) |
|
|
|
|
| def motion_lib_config(args: argparse.Namespace): |
| """Build motion library configuration.""" |
| return MotionLibConfig(motion_file=args.motion_file) |
|
|
|
|
| def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig: |
| """Build environment configuration (training defaults).""" |
| from protomotions.envs.motion_manager.config import MimicMotionManagerConfig |
| from protomotions.envs.control.mimic_control import MimicControlConfig |
| from protomotions.envs.component_factories import ( |
| max_coords_obs_factory, |
| previous_actions_factory, |
| mimic_target_poses_max_coords_factory, |
| action_smoothness_factory, |
| mimic_tracking_rewards_factory, |
| root_axis_rew_factory, |
| pow_rew_factory, |
| contact_match_rew_factory, |
| tracking_error_term_factory, |
| relative_body_pos_rew_factory, |
| relative_body_ori_rew_factory, |
| ) |
| from protomotions.envs.action import make_pd_action_config |
|
|
| control_components = { |
| "mimic": MimicControlConfig( |
| bootstrap_on_episode_end=True, |
| ) |
| } |
|
|
| observation_components = { |
| "max_coords_obs": max_coords_obs_factory(), |
| "previous_actions": previous_actions_factory(history_steps=1), |
| "mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True), |
| } |
|
|
| termination_components = { |
| "tracking_error": tracking_error_term_factory(threshold=0.5), |
| } |
|
|
| reward_components = { |
| "action_smoothness": action_smoothness_factory(weight=-0.02), |
| **mimic_tracking_rewards_factory( |
| gt_weight=0.5, |
| gr_weight=0.3, |
| gv_weight=0.1, |
| gav_weight=0.2, |
| rh_weight=0.2, |
| gt_coef=-5.0, |
| gr_coef=-5.0, |
| gv_coef=-0.5, |
| gav_coef=-0.1, |
| rh_coef=-100.0, |
| ), |
| "rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0), |
| "ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0), |
| "pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5), |
| "contact_match_rew": contact_match_rew_factory( |
| weight=-0.1, zero_during_grace_period=True |
| ), |
| "rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3), |
| "rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4), |
| } |
|
|
| return EnvConfig( |
| ref_contact_smooth_window=7, |
| max_episode_length=1000, |
| num_state_history_steps=2, |
| use_motion_root_position=True, |
| control_components=control_components, |
| observation_components=observation_components, |
| termination_components=termination_components, |
| reward_components=reward_components, |
| action_config=make_pd_action_config(robot_cfg), |
| motion_manager=MimicMotionManagerConfig( |
| init_start_prob=1.0, |
| resample_on_reset=True, |
| ), |
| ) |
|
|
|
|
| def agent_config( |
| robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace |
| ) -> PPOAgentConfig: |
| """Build agent configuration.""" |
| from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig |
| from protomotions.agents.ppo.config import ( |
| PPOActorConfig, |
| PPOModelConfig, |
| AdvantageNormalizationConfig, |
| ) |
| from protomotions.agents.base_agent.config import OptimizerConfig |
| from protomotions.agents.evaluators.config import ( |
| MimicEvaluatorConfig, |
| MotionWeightsRulesConfig, |
| ) |
| from protomotions.envs.component_factories import ( |
| gt_error_factory, |
| gr_error_factory, |
| max_joint_error_factory, |
| relative_body_pos_rew_factory, |
| relative_body_ori_rew_factory, |
| ) |
|
|
| actor_config = PPOActorConfig( |
| num_out=robot_config.kinematic_info.num_dofs, |
| actor_logstd=-2.9, |
| in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"], |
| mu_key="actor_trunk_out", |
| mu_model=MLPWithConcatConfig( |
| in_keys=[ |
| "max_coords_obs", |
| "terrain", |
| "mimic_target_poses", |
| "previous_actions", |
| ], |
| normalize_obs=True, |
| norm_clamp_value=5, |
| out_keys=["actor_trunk_out"], |
| num_out=robot_config.number_of_actions, |
| layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)], |
| ), |
| ) |
|
|
| critic_config = MLPWithConcatConfig( |
| in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"], |
| out_keys=["value"], |
| normalize_obs=True, |
| norm_clamp_value=5, |
| num_out=1, |
| layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)], |
| ) |
|
|
| agent_config: PPOAgentConfig = PPOAgentConfig( |
| model=PPOModelConfig( |
| in_keys=[ |
| "max_coords_obs", |
| "terrain", |
| "mimic_target_poses", |
| "previous_actions", |
| ], |
| out_keys=["action", "mean_action", "neglogp", "value"], |
| actor=actor_config, |
| critic=critic_config, |
| actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5), |
| critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4), |
| ), |
| batch_size=args.batch_size, |
| training_max_steps=args.training_max_steps, |
| gradient_clip_val=50.0, |
| clip_critic_loss=True, |
| evaluator=MimicEvaluatorConfig( |
| evaluation_components={ |
| "gt_error": gt_error_factory(threshold=0.5), |
| "gr_error": gr_error_factory(), |
| "max_joint_error": max_joint_error_factory(), |
| "rel_body_pos_rew": relative_body_pos_rew_factory( |
| weight=1.0, sigma=0.3 |
| ), |
| "rel_body_ori_rew": relative_body_ori_rew_factory( |
| weight=1.0, sigma=0.4 |
| ), |
| }, |
| motion_weights_rules=MotionWeightsRulesConfig( |
| motion_weights_update_success_discount=0.999, |
| motion_weights_update_failure_discount=0, |
| ), |
| ), |
| advantage_normalization=AdvantageNormalizationConfig( |
| enabled=True, shift_mean=True, use_ema=True |
| ), |
| ) |
| return agent_config |
|
|
|
|
| def configure_robot_and_simulator( |
| robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace |
| ): |
| """Configure robot and simulator for terrain training.""" |
| robot_cfg.update_fields( |
| contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"] |
| ) |
| if hasattr(simulator_cfg, "terrain_generator_spec"): |
| simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC |
|
|
|
|
| def apply_inference_overrides( |
| robot_cfg: RobotConfig, |
| simulator_cfg: SimulatorConfig, |
| env_cfg, |
| agent_cfg, |
| terrain_cfg: TerrainConfig, |
| motion_lib_cfg: MotionLibConfig, |
| scene_lib_cfg: SceneLibConfig, |
| args: argparse.Namespace, |
| ): |
| """Apply evaluation-specific overrides.""" |
| if hasattr(env_cfg, "termination_components") and env_cfg.termination_components: |
| env_cfg.termination_components = {} |
|
|
| env_cfg.max_episode_length = 1000000 |
| env_cfg.motion_manager.resample_on_reset = True |
| env_cfg.motion_manager.init_start_prob = 1.0 |
|
|