cheryl-tootty
/

PR_512_newusda2

Model card Files Files and versions

xet

Community

cheryl-tootty commited on 19 days ago

Commit

a8d3014

verified ·

1 Parent(s): d188d3a

Upload top-level files from nv_smpl_newusda_tunereward

Browse files

Files changed (1) hide show

experiment_config.py +314 -0

experiment_config.py ADDED Viewed

	@@ -0,0 +1,314 @@

+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 The ProtoMotions Developers
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+NV-SMPL Mimic on Isaac Lab Terrain
+====================================
+Mimic training for the NV-SMPL humanoid on terrain generated natively by
+Isaac Lab at training time (no pre-export step).  The robot spawns at
+the exact root position encoded in the motion file (zero respawn offset).
+Usage::
+    python protomotions/train_agent.py \\
+        --robot-name nv_smpl \\
+        --simulator isaaclab \\
+        --experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\
+        --motion-file <path_to_motion_pt> \\
+        --num-envs 64
+"""
+from protomotions.robot_configs.base import RobotConfig
+from protomotions.simulator.base_simulator.config import SimulatorConfig
+from protomotions.components.terrains.config import TerrainConfig
+from protomotions.envs.base_env.config import EnvConfig
+from protomotions.agents.ppo.config import PPOAgentConfig
+from protomotions.components.scene_lib import SceneLibConfig
+from protomotions.components.motion_lib import MotionLibConfig
+import argparse
+_STAIR_WIDTHS_CM = [26, 27, 28]
+_N_STAIR = len(_STAIR_WIDTHS_CM)
+_N_COLS = _N_STAIR * 2 + 2
+_PROP = 1.0 / _N_COLS
+_STAIR_COMMON = dict(
+    step_height_range=(0.180, 0.215),
+    platform_width=2.5,
+    border_width=0.75,
+    holes=False,
+)
+_sub_terrains_spec = {}
+for _w_cm in _STAIR_WIDTHS_CM:
+    _sub_terrains_spec[f"stairs_up_w{_w_cm}"] = {
+        "_target_": "MeshPyramidStairsTerrainCfg",
+        "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
+    }
+for _w_cm in _STAIR_WIDTHS_CM:
+    _sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = {
+        "_target_": "MeshInvertedPyramidStairsTerrainCfg",
+        "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
+    }
+_sub_terrains_spec["slope_up"] = {
+    "_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg",
+    "proportion": _PROP, "slope_range": (0.0, 0.3640),
+    "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
+}
+_sub_terrains_spec["slope_down"] = {
+    "_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg",
+    "proportion": _PROP, "slope_range": (0.0, 0.3640),
+    "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
+}
+TERRAIN_GENERATOR_SPEC = {
+    "size": (32.0, 32.0),
+    "border_width": 4.0,
+    "num_rows": 8,
+    "num_cols": _N_COLS,
+    "horizontal_scale": 0.05,
+    "vertical_scale": 0.005,
+    "slope_threshold": None,
+    "use_cache": False,
+    "curriculum": True,
+    "color_scheme": "height",
+    "sub_terrains": _sub_terrains_spec,
+}
+def terrain_config(args: argparse.Namespace):
+    """Build terrain configuration — Isaac Lab generates terrain natively."""
+    return TerrainConfig(
+        use_isaaclab_generator=True,
+        map_length=32.0,
+        map_width=32.0,
+        num_levels=8,
+        num_terrains=8,
+        horizontal_scale=0.05,
+        vertical_scale=0.005,
+        border_size=4.0,
+        minimal_humanoid_spacing=0.0,
+    )
+def scene_lib_config(args: argparse.Namespace):
+    """Build scene library configuration."""
+    scene_file = args.scenes_file if hasattr(args, "scenes_file") else None
+    return SceneLibConfig(scene_file=scene_file)
+def motion_lib_config(args: argparse.Namespace):
+    """Build motion library configuration."""
+    return MotionLibConfig(motion_file=args.motion_file)
+def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig:
+    """Build environment configuration (training defaults)."""
+    from protomotions.envs.motion_manager.config import MimicMotionManagerConfig
+    from protomotions.envs.control.mimic_control import MimicControlConfig
+    from protomotions.envs.component_factories import (
+        max_coords_obs_factory,
+        previous_actions_factory,
+        mimic_target_poses_max_coords_factory,
+        action_smoothness_factory,
+        mimic_tracking_rewards_factory,
+        root_axis_rew_factory,
+        pow_rew_factory,
+        contact_match_rew_factory,
+        tracking_error_term_factory,
+        relative_body_pos_rew_factory,
+        relative_body_ori_rew_factory,
+    )
+    from protomotions.envs.action import make_pd_action_config
+    control_components = {
+        "mimic": MimicControlConfig(
+            bootstrap_on_episode_end=True,
+        )
+    }
+    observation_components = {
+        "max_coords_obs": max_coords_obs_factory(),
+        "previous_actions": previous_actions_factory(history_steps=1),
+        "mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True),
+    }
+    termination_components = {
+        "tracking_error": tracking_error_term_factory(threshold=0.5),
+    }
+    reward_components = {
+        "action_smoothness": action_smoothness_factory(weight=-0.02),
+        **mimic_tracking_rewards_factory(
+            gt_weight=0.5,
+            gr_weight=0.3,
+            gv_weight=0.1,
+            gav_weight=0.2,
+            rh_weight=0.2,
+            gt_coef=-5.0,
+            gr_coef=-5.0,
+            gv_coef=-0.5,
+            gav_coef=-0.1,
+            rh_coef=-100.0,
+        ),
+        "rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0),
+        "ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0),
+        "pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5),
+        "contact_match_rew": contact_match_rew_factory(
+            weight=-0.1, zero_during_grace_period=True
+        ),
+        "rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3),
+        "rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4),
+    }
+    return EnvConfig(
+        ref_contact_smooth_window=7,
+        max_episode_length=1000,
+        num_state_history_steps=2,
+        use_motion_root_position=True,
+        control_components=control_components,
+        observation_components=observation_components,
+        termination_components=termination_components,
+        reward_components=reward_components,
+        action_config=make_pd_action_config(robot_cfg),
+        motion_manager=MimicMotionManagerConfig(
+            init_start_prob=1.0,
+            resample_on_reset=True,
+        ),
+    )
+def agent_config(
+    robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace
+) -> PPOAgentConfig:
+    """Build agent configuration."""
+    from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig
+    from protomotions.agents.ppo.config import (
+        PPOActorConfig,
+        PPOModelConfig,
+        AdvantageNormalizationConfig,
+    )
+    from protomotions.agents.base_agent.config import OptimizerConfig
+    from protomotions.agents.evaluators.config import (
+        MimicEvaluatorConfig,
+        MotionWeightsRulesConfig,
+    )
+    from protomotions.envs.component_factories import (
+        gt_error_factory,
+        gr_error_factory,
+        max_joint_error_factory,
+        relative_body_pos_rew_factory,
+        relative_body_ori_rew_factory,
+    )
+    actor_config = PPOActorConfig(
+        num_out=robot_config.kinematic_info.num_dofs,
+        actor_logstd=-2.9,
+        in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
+        mu_key="actor_trunk_out",
+        mu_model=MLPWithConcatConfig(
+            in_keys=[
+                "max_coords_obs",
+                "terrain",
+                "mimic_target_poses",
+                "previous_actions",
+            ],
+            normalize_obs=True,
+            norm_clamp_value=5,
+            out_keys=["actor_trunk_out"],
+            num_out=robot_config.number_of_actions,
+            layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)],
+        ),
+    )
+    critic_config = MLPWithConcatConfig(
+        in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
+        out_keys=["value"],
+        normalize_obs=True,
+        norm_clamp_value=5,
+        num_out=1,
+        layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)],
+    )
+    agent_config: PPOAgentConfig = PPOAgentConfig(
+        model=PPOModelConfig(
+            in_keys=[
+                "max_coords_obs",
+                "terrain",
+                "mimic_target_poses",
+                "previous_actions",
+            ],
+            out_keys=["action", "mean_action", "neglogp", "value"],
+            actor=actor_config,
+            critic=critic_config,
+            actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5),
+            critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4),
+        ),
+        batch_size=args.batch_size,
+        training_max_steps=args.training_max_steps,
+        gradient_clip_val=50.0,
+        clip_critic_loss=True,
+        evaluator=MimicEvaluatorConfig(
+            evaluation_components={
+                "gt_error": gt_error_factory(threshold=0.5),
+                "gr_error": gr_error_factory(),
+                "max_joint_error": max_joint_error_factory(),
+                "rel_body_pos_rew": relative_body_pos_rew_factory(
+                    weight=1.0, sigma=0.3
+                ),
+                "rel_body_ori_rew": relative_body_ori_rew_factory(
+                    weight=1.0, sigma=0.4
+                ),
+            },
+            motion_weights_rules=MotionWeightsRulesConfig(
+                motion_weights_update_success_discount=0.999,
+                motion_weights_update_failure_discount=0,
+            ),
+        ),
+        advantage_normalization=AdvantageNormalizationConfig(
+            enabled=True, shift_mean=True, use_ema=True
+        ),
+    )
+    return agent_config
+def configure_robot_and_simulator(
+    robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace
+):
+    """Configure robot and simulator for terrain training."""
+    robot_cfg.update_fields(
+        contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"]
+    )
+    if hasattr(simulator_cfg, "terrain_generator_spec"):
+        simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC
+def apply_inference_overrides(
+    robot_cfg: RobotConfig,
+    simulator_cfg: SimulatorConfig,
+    env_cfg,
+    agent_cfg,
+    terrain_cfg: TerrainConfig,
+    motion_lib_cfg: MotionLibConfig,
+    scene_lib_cfg: SceneLibConfig,
+    args: argparse.Namespace,
+):
+    """Apply evaluation-specific overrides."""
+    if hasattr(env_cfg, "termination_components") and env_cfg.termination_components:
+        env_cfg.termination_components = {}
+    env_cfg.max_episode_length = 1000000
+    env_cfg.motion_manager.resample_on_reset = True
+    env_cfg.motion_manager.init_start_prob = 1.0