PR_512_newusda2 / experiment_config.py
cheryl-tootty's picture
Upload top-level files from nv_smpl_newusda_tunereward
a8d3014 verified
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 The ProtoMotions Developers
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
NV-SMPL Mimic on Isaac Lab Terrain
====================================
Mimic training for the NV-SMPL humanoid on terrain generated natively by
Isaac Lab at training time (no pre-export step). The robot spawns at
the exact root position encoded in the motion file (zero respawn offset).
Usage::
python protomotions/train_agent.py \\
--robot-name nv_smpl \\
--simulator isaaclab \\
--experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\
--motion-file <path_to_motion_pt> \\
--num-envs 64
"""
from protomotions.robot_configs.base import RobotConfig
from protomotions.simulator.base_simulator.config import SimulatorConfig
from protomotions.components.terrains.config import TerrainConfig
from protomotions.envs.base_env.config import EnvConfig
from protomotions.agents.ppo.config import PPOAgentConfig
from protomotions.components.scene_lib import SceneLibConfig
from protomotions.components.motion_lib import MotionLibConfig
import argparse
_STAIR_WIDTHS_CM = [26, 27, 28]
_N_STAIR = len(_STAIR_WIDTHS_CM)
_N_COLS = _N_STAIR * 2 + 2
_PROP = 1.0 / _N_COLS
_STAIR_COMMON = dict(
step_height_range=(0.180, 0.215),
platform_width=2.5,
border_width=0.75,
holes=False,
)
_sub_terrains_spec = {}
for _w_cm in _STAIR_WIDTHS_CM:
_sub_terrains_spec[f"stairs_up_w{_w_cm}"] = {
"_target_": "MeshPyramidStairsTerrainCfg",
"proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
}
for _w_cm in _STAIR_WIDTHS_CM:
_sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = {
"_target_": "MeshInvertedPyramidStairsTerrainCfg",
"proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
}
_sub_terrains_spec["slope_up"] = {
"_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg",
"proportion": _PROP, "slope_range": (0.0, 0.3640),
"platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
}
_sub_terrains_spec["slope_down"] = {
"_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg",
"proportion": _PROP, "slope_range": (0.0, 0.3640),
"platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
}
TERRAIN_GENERATOR_SPEC = {
"size": (32.0, 32.0),
"border_width": 4.0,
"num_rows": 8,
"num_cols": _N_COLS,
"horizontal_scale": 0.05,
"vertical_scale": 0.005,
"slope_threshold": None,
"use_cache": False,
"curriculum": True,
"color_scheme": "height",
"sub_terrains": _sub_terrains_spec,
}
def terrain_config(args: argparse.Namespace):
"""Build terrain configuration — Isaac Lab generates terrain natively."""
return TerrainConfig(
use_isaaclab_generator=True,
map_length=32.0,
map_width=32.0,
num_levels=8,
num_terrains=8,
horizontal_scale=0.05,
vertical_scale=0.005,
border_size=4.0,
minimal_humanoid_spacing=0.0,
)
def scene_lib_config(args: argparse.Namespace):
"""Build scene library configuration."""
scene_file = args.scenes_file if hasattr(args, "scenes_file") else None
return SceneLibConfig(scene_file=scene_file)
def motion_lib_config(args: argparse.Namespace):
"""Build motion library configuration."""
return MotionLibConfig(motion_file=args.motion_file)
def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig:
"""Build environment configuration (training defaults)."""
from protomotions.envs.motion_manager.config import MimicMotionManagerConfig
from protomotions.envs.control.mimic_control import MimicControlConfig
from protomotions.envs.component_factories import (
max_coords_obs_factory,
previous_actions_factory,
mimic_target_poses_max_coords_factory,
action_smoothness_factory,
mimic_tracking_rewards_factory,
root_axis_rew_factory,
pow_rew_factory,
contact_match_rew_factory,
tracking_error_term_factory,
relative_body_pos_rew_factory,
relative_body_ori_rew_factory,
)
from protomotions.envs.action import make_pd_action_config
control_components = {
"mimic": MimicControlConfig(
bootstrap_on_episode_end=True,
)
}
observation_components = {
"max_coords_obs": max_coords_obs_factory(),
"previous_actions": previous_actions_factory(history_steps=1),
"mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True),
}
termination_components = {
"tracking_error": tracking_error_term_factory(threshold=0.5),
}
reward_components = {
"action_smoothness": action_smoothness_factory(weight=-0.02),
**mimic_tracking_rewards_factory(
gt_weight=0.5,
gr_weight=0.3,
gv_weight=0.1,
gav_weight=0.2,
rh_weight=0.2,
gt_coef=-5.0,
gr_coef=-5.0,
gv_coef=-0.5,
gav_coef=-0.1,
rh_coef=-100.0,
),
"rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0),
"ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0),
"pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5),
"contact_match_rew": contact_match_rew_factory(
weight=-0.1, zero_during_grace_period=True
),
"rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3),
"rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4),
}
return EnvConfig(
ref_contact_smooth_window=7,
max_episode_length=1000,
num_state_history_steps=2,
use_motion_root_position=True,
control_components=control_components,
observation_components=observation_components,
termination_components=termination_components,
reward_components=reward_components,
action_config=make_pd_action_config(robot_cfg),
motion_manager=MimicMotionManagerConfig(
init_start_prob=1.0,
resample_on_reset=True,
),
)
def agent_config(
robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace
) -> PPOAgentConfig:
"""Build agent configuration."""
from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig
from protomotions.agents.ppo.config import (
PPOActorConfig,
PPOModelConfig,
AdvantageNormalizationConfig,
)
from protomotions.agents.base_agent.config import OptimizerConfig
from protomotions.agents.evaluators.config import (
MimicEvaluatorConfig,
MotionWeightsRulesConfig,
)
from protomotions.envs.component_factories import (
gt_error_factory,
gr_error_factory,
max_joint_error_factory,
relative_body_pos_rew_factory,
relative_body_ori_rew_factory,
)
actor_config = PPOActorConfig(
num_out=robot_config.kinematic_info.num_dofs,
actor_logstd=-2.9,
in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
mu_key="actor_trunk_out",
mu_model=MLPWithConcatConfig(
in_keys=[
"max_coords_obs",
"terrain",
"mimic_target_poses",
"previous_actions",
],
normalize_obs=True,
norm_clamp_value=5,
out_keys=["actor_trunk_out"],
num_out=robot_config.number_of_actions,
layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)],
),
)
critic_config = MLPWithConcatConfig(
in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
out_keys=["value"],
normalize_obs=True,
norm_clamp_value=5,
num_out=1,
layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)],
)
agent_config: PPOAgentConfig = PPOAgentConfig(
model=PPOModelConfig(
in_keys=[
"max_coords_obs",
"terrain",
"mimic_target_poses",
"previous_actions",
],
out_keys=["action", "mean_action", "neglogp", "value"],
actor=actor_config,
critic=critic_config,
actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5),
critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4),
),
batch_size=args.batch_size,
training_max_steps=args.training_max_steps,
gradient_clip_val=50.0,
clip_critic_loss=True,
evaluator=MimicEvaluatorConfig(
evaluation_components={
"gt_error": gt_error_factory(threshold=0.5),
"gr_error": gr_error_factory(),
"max_joint_error": max_joint_error_factory(),
"rel_body_pos_rew": relative_body_pos_rew_factory(
weight=1.0, sigma=0.3
),
"rel_body_ori_rew": relative_body_ori_rew_factory(
weight=1.0, sigma=0.4
),
},
motion_weights_rules=MotionWeightsRulesConfig(
motion_weights_update_success_discount=0.999,
motion_weights_update_failure_discount=0,
),
),
advantage_normalization=AdvantageNormalizationConfig(
enabled=True, shift_mean=True, use_ema=True
),
)
return agent_config
def configure_robot_and_simulator(
robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace
):
"""Configure robot and simulator for terrain training."""
robot_cfg.update_fields(
contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"]
)
if hasattr(simulator_cfg, "terrain_generator_spec"):
simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC
def apply_inference_overrides(
robot_cfg: RobotConfig,
simulator_cfg: SimulatorConfig,
env_cfg,
agent_cfg,
terrain_cfg: TerrainConfig,
motion_lib_cfg: MotionLibConfig,
scene_lib_cfg: SceneLibConfig,
args: argparse.Namespace,
):
"""Apply evaluation-specific overrides."""
if hasattr(env_cfg, "termination_components") and env_cfg.termination_components:
env_cfg.termination_components = {}
env_cfg.max_episode_length = 1000000
env_cfg.motion_manager.resample_on_reset = True
env_cfg.motion_manager.init_start_prob = 1.0