PR_512_newusda2 / experiment_config.py

Upload top-level files from nv_smpl_newusda_tunereward

a8d3014 verified 16 days ago

11.2 kB

	# SPDX-FileCopyrightText: Copyright (c) 2025-2026 The ProtoMotions Developers
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	"""
	NV-SMPL Mimic on Isaac Lab Terrain
	====================================

	Mimic training for the NV-SMPL humanoid on terrain generated natively by
	Isaac Lab at training time (no pre-export step). The robot spawns at
	the exact root position encoded in the motion file (zero respawn offset).

	Usage::

	python protomotions/train_agent.py \\
	--robot-name nv_smpl \\
	--simulator isaaclab \\
	--experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\
	--motion-file <path_to_motion_pt> \\
	--num-envs 64
	"""
	from protomotions.robot_configs.base import RobotConfig
	from protomotions.simulator.base_simulator.config import SimulatorConfig
	from protomotions.components.terrains.config import TerrainConfig
	from protomotions.envs.base_env.config import EnvConfig
	from protomotions.agents.ppo.config import PPOAgentConfig
	from protomotions.components.scene_lib import SceneLibConfig
	from protomotions.components.motion_lib import MotionLibConfig
	import argparse

	_STAIR_WIDTHS_CM = [26, 27, 28]
	_N_STAIR = len(_STAIR_WIDTHS_CM)
	_N_COLS = _N_STAIR * 2 + 2
	_PROP = 1.0 / _N_COLS

	_STAIR_COMMON = dict(
	step_height_range=(0.180, 0.215),
	platform_width=2.5,
	border_width=0.75,
	holes=False,
	)

	_sub_terrains_spec = {}
	for _w_cm in _STAIR_WIDTHS_CM:
	_sub_terrains_spec[f"stairs_up_w{_w_cm}"] = {
	"_target_": "MeshPyramidStairsTerrainCfg",
	"proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
	}
	for _w_cm in _STAIR_WIDTHS_CM:
	_sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = {
	"_target_": "MeshInvertedPyramidStairsTerrainCfg",
	"proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
	}
	_sub_terrains_spec["slope_up"] = {
	"_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg",
	"proportion": _PROP, "slope_range": (0.0, 0.3640),
	"platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
	}
	_sub_terrains_spec["slope_down"] = {
	"_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg",
	"proportion": _PROP, "slope_range": (0.0, 0.3640),
	"platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
	}

	TERRAIN_GENERATOR_SPEC = {
	"size": (32.0, 32.0),
	"border_width": 4.0,
	"num_rows": 8,
	"num_cols": _N_COLS,
	"horizontal_scale": 0.05,
	"vertical_scale": 0.005,
	"slope_threshold": None,
	"use_cache": False,
	"curriculum": True,
	"color_scheme": "height",
	"sub_terrains": _sub_terrains_spec,
	}


	def terrain_config(args: argparse.Namespace):
	"""Build terrain configuration — Isaac Lab generates terrain natively."""
	return TerrainConfig(
	use_isaaclab_generator=True,
	map_length=32.0,
	map_width=32.0,
	num_levels=8,
	num_terrains=8,
	horizontal_scale=0.05,
	vertical_scale=0.005,
	border_size=4.0,
	minimal_humanoid_spacing=0.0,
	)


	def scene_lib_config(args: argparse.Namespace):
	"""Build scene library configuration."""
	scene_file = args.scenes_file if hasattr(args, "scenes_file") else None
	return SceneLibConfig(scene_file=scene_file)


	def motion_lib_config(args: argparse.Namespace):
	"""Build motion library configuration."""
	return MotionLibConfig(motion_file=args.motion_file)


	def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig:
	"""Build environment configuration (training defaults)."""
	from protomotions.envs.motion_manager.config import MimicMotionManagerConfig
	from protomotions.envs.control.mimic_control import MimicControlConfig
	from protomotions.envs.component_factories import (
	max_coords_obs_factory,
	previous_actions_factory,
	mimic_target_poses_max_coords_factory,
	action_smoothness_factory,
	mimic_tracking_rewards_factory,
	root_axis_rew_factory,
	pow_rew_factory,
	contact_match_rew_factory,
	tracking_error_term_factory,
	relative_body_pos_rew_factory,
	relative_body_ori_rew_factory,
	)
	from protomotions.envs.action import make_pd_action_config

	control_components = {
	"mimic": MimicControlConfig(
	bootstrap_on_episode_end=True,
	)
	}

	observation_components = {
	"max_coords_obs": max_coords_obs_factory(),
	"previous_actions": previous_actions_factory(history_steps=1),
	"mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True),
	}

	termination_components = {
	"tracking_error": tracking_error_term_factory(threshold=0.5),
	}

	reward_components = {
	"action_smoothness": action_smoothness_factory(weight=-0.02),
	**mimic_tracking_rewards_factory(
	gt_weight=0.5,
	gr_weight=0.3,
	gv_weight=0.1,
	gav_weight=0.2,
	rh_weight=0.2,
	gt_coef=-5.0,
	gr_coef=-5.0,
	gv_coef=-0.5,
	gav_coef=-0.1,
	rh_coef=-100.0,
	),
	"rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0),
	"ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0),
	"pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5),
	"contact_match_rew": contact_match_rew_factory(
	weight=-0.1, zero_during_grace_period=True
	),
	"rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3),
	"rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4),
	}

	return EnvConfig(
	ref_contact_smooth_window=7,
	max_episode_length=1000,
	num_state_history_steps=2,
	use_motion_root_position=True,
	control_components=control_components,
	observation_components=observation_components,
	termination_components=termination_components,
	reward_components=reward_components,
	action_config=make_pd_action_config(robot_cfg),
	motion_manager=MimicMotionManagerConfig(
	init_start_prob=1.0,
	resample_on_reset=True,
	),
	)


	def agent_config(
	robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace
	) -> PPOAgentConfig:
	"""Build agent configuration."""
	from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig
	from protomotions.agents.ppo.config import (
	PPOActorConfig,
	PPOModelConfig,
	AdvantageNormalizationConfig,
	)
	from protomotions.agents.base_agent.config import OptimizerConfig
	from protomotions.agents.evaluators.config import (
	MimicEvaluatorConfig,
	MotionWeightsRulesConfig,
	)
	from protomotions.envs.component_factories import (
	gt_error_factory,
	gr_error_factory,
	max_joint_error_factory,
	relative_body_pos_rew_factory,
	relative_body_ori_rew_factory,
	)

	actor_config = PPOActorConfig(
	num_out=robot_config.kinematic_info.num_dofs,
	actor_logstd=-2.9,
	in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
	mu_key="actor_trunk_out",
	mu_model=MLPWithConcatConfig(
	in_keys=[
	"max_coords_obs",
	"terrain",
	"mimic_target_poses",
	"previous_actions",
	],
	normalize_obs=True,
	norm_clamp_value=5,
	out_keys=["actor_trunk_out"],
	num_out=robot_config.number_of_actions,
	layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)],
	),
	)

	critic_config = MLPWithConcatConfig(
	in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
	out_keys=["value"],
	normalize_obs=True,
	norm_clamp_value=5,
	num_out=1,
	layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)],
	)

	agent_config: PPOAgentConfig = PPOAgentConfig(
	model=PPOModelConfig(
	in_keys=[
	"max_coords_obs",
	"terrain",
	"mimic_target_poses",
	"previous_actions",
	],
	out_keys=["action", "mean_action", "neglogp", "value"],
	actor=actor_config,
	critic=critic_config,
	actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5),
	critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4),
	),
	batch_size=args.batch_size,
	training_max_steps=args.training_max_steps,
	gradient_clip_val=50.0,
	clip_critic_loss=True,
	evaluator=MimicEvaluatorConfig(
	evaluation_components={
	"gt_error": gt_error_factory(threshold=0.5),
	"gr_error": gr_error_factory(),
	"max_joint_error": max_joint_error_factory(),
	"rel_body_pos_rew": relative_body_pos_rew_factory(
	weight=1.0, sigma=0.3
	),
	"rel_body_ori_rew": relative_body_ori_rew_factory(
	weight=1.0, sigma=0.4
	),
	},
	motion_weights_rules=MotionWeightsRulesConfig(
	motion_weights_update_success_discount=0.999,
	motion_weights_update_failure_discount=0,
	),
	),
	advantage_normalization=AdvantageNormalizationConfig(
	enabled=True, shift_mean=True, use_ema=True
	),
	)
	return agent_config


	def configure_robot_and_simulator(
	robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace
	):
	"""Configure robot and simulator for terrain training."""
	robot_cfg.update_fields(
	contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"]
	)
	if hasattr(simulator_cfg, "terrain_generator_spec"):
	simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC


	def apply_inference_overrides(
	robot_cfg: RobotConfig,
	simulator_cfg: SimulatorConfig,
	env_cfg,
	agent_cfg,
	terrain_cfg: TerrainConfig,
	motion_lib_cfg: MotionLibConfig,
	scene_lib_cfg: SceneLibConfig,
	args: argparse.Namespace,
	):
	"""Apply evaluation-specific overrides."""
	if hasattr(env_cfg, "termination_components") and env_cfg.termination_components:
	env_cfg.termination_components = {}

	env_cfg.max_episode_length = 1000000
	env_cfg.motion_manager.resample_on_reset = True
	env_cfg.motion_manager.init_start_prob = 1.0