cheryl-tootty commited on
Commit
a8d3014
·
verified ·
1 Parent(s): d188d3a

Upload top-level files from nv_smpl_newusda_tunereward

Browse files
Files changed (1) hide show
  1. experiment_config.py +314 -0
experiment_config.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 The ProtoMotions Developers
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ """
17
+ NV-SMPL Mimic on Isaac Lab Terrain
18
+ ====================================
19
+
20
+ Mimic training for the NV-SMPL humanoid on terrain generated natively by
21
+ Isaac Lab at training time (no pre-export step). The robot spawns at
22
+ the exact root position encoded in the motion file (zero respawn offset).
23
+
24
+ Usage::
25
+
26
+ python protomotions/train_agent.py \\
27
+ --robot-name nv_smpl \\
28
+ --simulator isaaclab \\
29
+ --experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\
30
+ --motion-file <path_to_motion_pt> \\
31
+ --num-envs 64
32
+ """
33
+ from protomotions.robot_configs.base import RobotConfig
34
+ from protomotions.simulator.base_simulator.config import SimulatorConfig
35
+ from protomotions.components.terrains.config import TerrainConfig
36
+ from protomotions.envs.base_env.config import EnvConfig
37
+ from protomotions.agents.ppo.config import PPOAgentConfig
38
+ from protomotions.components.scene_lib import SceneLibConfig
39
+ from protomotions.components.motion_lib import MotionLibConfig
40
+ import argparse
41
+
42
+ _STAIR_WIDTHS_CM = [26, 27, 28]
43
+ _N_STAIR = len(_STAIR_WIDTHS_CM)
44
+ _N_COLS = _N_STAIR * 2 + 2
45
+ _PROP = 1.0 / _N_COLS
46
+
47
+ _STAIR_COMMON = dict(
48
+ step_height_range=(0.180, 0.215),
49
+ platform_width=2.5,
50
+ border_width=0.75,
51
+ holes=False,
52
+ )
53
+
54
+ _sub_terrains_spec = {}
55
+ for _w_cm in _STAIR_WIDTHS_CM:
56
+ _sub_terrains_spec[f"stairs_up_w{_w_cm}"] = {
57
+ "_target_": "MeshPyramidStairsTerrainCfg",
58
+ "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
59
+ }
60
+ for _w_cm in _STAIR_WIDTHS_CM:
61
+ _sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = {
62
+ "_target_": "MeshInvertedPyramidStairsTerrainCfg",
63
+ "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
64
+ }
65
+ _sub_terrains_spec["slope_up"] = {
66
+ "_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg",
67
+ "proportion": _PROP, "slope_range": (0.0, 0.3640),
68
+ "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
69
+ }
70
+ _sub_terrains_spec["slope_down"] = {
71
+ "_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg",
72
+ "proportion": _PROP, "slope_range": (0.0, 0.3640),
73
+ "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
74
+ }
75
+
76
+ TERRAIN_GENERATOR_SPEC = {
77
+ "size": (32.0, 32.0),
78
+ "border_width": 4.0,
79
+ "num_rows": 8,
80
+ "num_cols": _N_COLS,
81
+ "horizontal_scale": 0.05,
82
+ "vertical_scale": 0.005,
83
+ "slope_threshold": None,
84
+ "use_cache": False,
85
+ "curriculum": True,
86
+ "color_scheme": "height",
87
+ "sub_terrains": _sub_terrains_spec,
88
+ }
89
+
90
+
91
+ def terrain_config(args: argparse.Namespace):
92
+ """Build terrain configuration — Isaac Lab generates terrain natively."""
93
+ return TerrainConfig(
94
+ use_isaaclab_generator=True,
95
+ map_length=32.0,
96
+ map_width=32.0,
97
+ num_levels=8,
98
+ num_terrains=8,
99
+ horizontal_scale=0.05,
100
+ vertical_scale=0.005,
101
+ border_size=4.0,
102
+ minimal_humanoid_spacing=0.0,
103
+ )
104
+
105
+
106
+ def scene_lib_config(args: argparse.Namespace):
107
+ """Build scene library configuration."""
108
+ scene_file = args.scenes_file if hasattr(args, "scenes_file") else None
109
+ return SceneLibConfig(scene_file=scene_file)
110
+
111
+
112
+ def motion_lib_config(args: argparse.Namespace):
113
+ """Build motion library configuration."""
114
+ return MotionLibConfig(motion_file=args.motion_file)
115
+
116
+
117
+ def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig:
118
+ """Build environment configuration (training defaults)."""
119
+ from protomotions.envs.motion_manager.config import MimicMotionManagerConfig
120
+ from protomotions.envs.control.mimic_control import MimicControlConfig
121
+ from protomotions.envs.component_factories import (
122
+ max_coords_obs_factory,
123
+ previous_actions_factory,
124
+ mimic_target_poses_max_coords_factory,
125
+ action_smoothness_factory,
126
+ mimic_tracking_rewards_factory,
127
+ root_axis_rew_factory,
128
+ pow_rew_factory,
129
+ contact_match_rew_factory,
130
+ tracking_error_term_factory,
131
+ relative_body_pos_rew_factory,
132
+ relative_body_ori_rew_factory,
133
+ )
134
+ from protomotions.envs.action import make_pd_action_config
135
+
136
+ control_components = {
137
+ "mimic": MimicControlConfig(
138
+ bootstrap_on_episode_end=True,
139
+ )
140
+ }
141
+
142
+ observation_components = {
143
+ "max_coords_obs": max_coords_obs_factory(),
144
+ "previous_actions": previous_actions_factory(history_steps=1),
145
+ "mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True),
146
+ }
147
+
148
+ termination_components = {
149
+ "tracking_error": tracking_error_term_factory(threshold=0.5),
150
+ }
151
+
152
+ reward_components = {
153
+ "action_smoothness": action_smoothness_factory(weight=-0.02),
154
+ **mimic_tracking_rewards_factory(
155
+ gt_weight=0.5,
156
+ gr_weight=0.3,
157
+ gv_weight=0.1,
158
+ gav_weight=0.2,
159
+ rh_weight=0.2,
160
+ gt_coef=-5.0,
161
+ gr_coef=-5.0,
162
+ gv_coef=-0.5,
163
+ gav_coef=-0.1,
164
+ rh_coef=-100.0,
165
+ ),
166
+ "rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0),
167
+ "ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0),
168
+ "pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5),
169
+ "contact_match_rew": contact_match_rew_factory(
170
+ weight=-0.1, zero_during_grace_period=True
171
+ ),
172
+ "rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3),
173
+ "rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4),
174
+ }
175
+
176
+ return EnvConfig(
177
+ ref_contact_smooth_window=7,
178
+ max_episode_length=1000,
179
+ num_state_history_steps=2,
180
+ use_motion_root_position=True,
181
+ control_components=control_components,
182
+ observation_components=observation_components,
183
+ termination_components=termination_components,
184
+ reward_components=reward_components,
185
+ action_config=make_pd_action_config(robot_cfg),
186
+ motion_manager=MimicMotionManagerConfig(
187
+ init_start_prob=1.0,
188
+ resample_on_reset=True,
189
+ ),
190
+ )
191
+
192
+
193
+ def agent_config(
194
+ robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace
195
+ ) -> PPOAgentConfig:
196
+ """Build agent configuration."""
197
+ from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig
198
+ from protomotions.agents.ppo.config import (
199
+ PPOActorConfig,
200
+ PPOModelConfig,
201
+ AdvantageNormalizationConfig,
202
+ )
203
+ from protomotions.agents.base_agent.config import OptimizerConfig
204
+ from protomotions.agents.evaluators.config import (
205
+ MimicEvaluatorConfig,
206
+ MotionWeightsRulesConfig,
207
+ )
208
+ from protomotions.envs.component_factories import (
209
+ gt_error_factory,
210
+ gr_error_factory,
211
+ max_joint_error_factory,
212
+ relative_body_pos_rew_factory,
213
+ relative_body_ori_rew_factory,
214
+ )
215
+
216
+ actor_config = PPOActorConfig(
217
+ num_out=robot_config.kinematic_info.num_dofs,
218
+ actor_logstd=-2.9,
219
+ in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
220
+ mu_key="actor_trunk_out",
221
+ mu_model=MLPWithConcatConfig(
222
+ in_keys=[
223
+ "max_coords_obs",
224
+ "terrain",
225
+ "mimic_target_poses",
226
+ "previous_actions",
227
+ ],
228
+ normalize_obs=True,
229
+ norm_clamp_value=5,
230
+ out_keys=["actor_trunk_out"],
231
+ num_out=robot_config.number_of_actions,
232
+ layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)],
233
+ ),
234
+ )
235
+
236
+ critic_config = MLPWithConcatConfig(
237
+ in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
238
+ out_keys=["value"],
239
+ normalize_obs=True,
240
+ norm_clamp_value=5,
241
+ num_out=1,
242
+ layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)],
243
+ )
244
+
245
+ agent_config: PPOAgentConfig = PPOAgentConfig(
246
+ model=PPOModelConfig(
247
+ in_keys=[
248
+ "max_coords_obs",
249
+ "terrain",
250
+ "mimic_target_poses",
251
+ "previous_actions",
252
+ ],
253
+ out_keys=["action", "mean_action", "neglogp", "value"],
254
+ actor=actor_config,
255
+ critic=critic_config,
256
+ actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5),
257
+ critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4),
258
+ ),
259
+ batch_size=args.batch_size,
260
+ training_max_steps=args.training_max_steps,
261
+ gradient_clip_val=50.0,
262
+ clip_critic_loss=True,
263
+ evaluator=MimicEvaluatorConfig(
264
+ evaluation_components={
265
+ "gt_error": gt_error_factory(threshold=0.5),
266
+ "gr_error": gr_error_factory(),
267
+ "max_joint_error": max_joint_error_factory(),
268
+ "rel_body_pos_rew": relative_body_pos_rew_factory(
269
+ weight=1.0, sigma=0.3
270
+ ),
271
+ "rel_body_ori_rew": relative_body_ori_rew_factory(
272
+ weight=1.0, sigma=0.4
273
+ ),
274
+ },
275
+ motion_weights_rules=MotionWeightsRulesConfig(
276
+ motion_weights_update_success_discount=0.999,
277
+ motion_weights_update_failure_discount=0,
278
+ ),
279
+ ),
280
+ advantage_normalization=AdvantageNormalizationConfig(
281
+ enabled=True, shift_mean=True, use_ema=True
282
+ ),
283
+ )
284
+ return agent_config
285
+
286
+
287
+ def configure_robot_and_simulator(
288
+ robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace
289
+ ):
290
+ """Configure robot and simulator for terrain training."""
291
+ robot_cfg.update_fields(
292
+ contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"]
293
+ )
294
+ if hasattr(simulator_cfg, "terrain_generator_spec"):
295
+ simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC
296
+
297
+
298
+ def apply_inference_overrides(
299
+ robot_cfg: RobotConfig,
300
+ simulator_cfg: SimulatorConfig,
301
+ env_cfg,
302
+ agent_cfg,
303
+ terrain_cfg: TerrainConfig,
304
+ motion_lib_cfg: MotionLibConfig,
305
+ scene_lib_cfg: SceneLibConfig,
306
+ args: argparse.Namespace,
307
+ ):
308
+ """Apply evaluation-specific overrides."""
309
+ if hasattr(env_cfg, "termination_components") and env_cfg.termination_components:
310
+ env_cfg.termination_components = {}
311
+
312
+ env_cfg.max_episode_length = 1000000
313
+ env_cfg.motion_manager.resample_on_reset = True
314
+ env_cfg.motion_manager.init_start_prob = 1.0