# SPDX-FileCopyrightText: Copyright (c) 2025-2026 The ProtoMotions Developers
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
NV-SMPL Mimic on Isaac Lab Terrain
====================================

Mimic training for the NV-SMPL humanoid on terrain generated natively by
Isaac Lab at training time (no pre-export step).  The robot spawns at
the exact root position encoded in the motion file (zero respawn offset).

Usage::

    python protomotions/train_agent.py \\
        --robot-name nv_smpl \\
        --simulator isaaclab \\
        --experiment-path examples/experiments/mimic/mlp_nvsmpl_terrain.py \\
        --motion-file <path_to_motion_pt> \\
        --num-envs 64
"""
from protomotions.robot_configs.base import RobotConfig
from protomotions.simulator.base_simulator.config import SimulatorConfig
from protomotions.components.terrains.config import TerrainConfig
from protomotions.envs.base_env.config import EnvConfig
from protomotions.agents.ppo.config import PPOAgentConfig
from protomotions.components.scene_lib import SceneLibConfig
from protomotions.components.motion_lib import MotionLibConfig
import argparse

_STAIR_WIDTHS_CM = [26, 27, 28]
_N_STAIR = len(_STAIR_WIDTHS_CM)
_N_COLS = _N_STAIR * 2 + 2
_PROP = 1.0 / _N_COLS

_STAIR_COMMON = dict(
    step_height_range=(0.180, 0.215),
    platform_width=2.5,
    border_width=0.75,
    holes=False,
)

_sub_terrains_spec = {}
for _w_cm in _STAIR_WIDTHS_CM:
    _sub_terrains_spec[f"stairs_up_w{_w_cm}"] = {
        "_target_": "MeshPyramidStairsTerrainCfg",
        "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
    }
for _w_cm in _STAIR_WIDTHS_CM:
    _sub_terrains_spec[f"stairs_dn_w{_w_cm}"] = {
        "_target_": "MeshInvertedPyramidStairsTerrainCfg",
        "proportion": _PROP, "step_width": _w_cm / 100.0, **_STAIR_COMMON,
    }
_sub_terrains_spec["slope_up"] = {
    "_target_": "smooth_slope.MeshSmoothPyramidSlopeCfg",
    "proportion": _PROP, "slope_range": (0.0, 0.3640),
    "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
}
_sub_terrains_spec["slope_down"] = {
    "_target_": "smooth_slope.MeshSmoothInvertedPyramidSlopeCfg",
    "proportion": _PROP, "slope_range": (0.0, 0.3640),
    "platform_width": 2.0, "border_width": 0.25, "resolution": (256, 256),
}

TERRAIN_GENERATOR_SPEC = {
    "size": (32.0, 32.0),
    "border_width": 4.0,
    "num_rows": 8,
    "num_cols": _N_COLS,
    "horizontal_scale": 0.05,
    "vertical_scale": 0.005,
    "slope_threshold": None,
    "use_cache": False,
    "curriculum": True,
    "color_scheme": "height",
    "sub_terrains": _sub_terrains_spec,
}


def terrain_config(args: argparse.Namespace):
    """Build terrain configuration — Isaac Lab generates terrain natively."""
    return TerrainConfig(
        use_isaaclab_generator=True,
        map_length=32.0,
        map_width=32.0,
        num_levels=8,
        num_terrains=8,
        horizontal_scale=0.05,
        vertical_scale=0.005,
        border_size=4.0,
        minimal_humanoid_spacing=0.0,
    )


def scene_lib_config(args: argparse.Namespace):
    """Build scene library configuration."""
    scene_file = args.scenes_file if hasattr(args, "scenes_file") else None
    return SceneLibConfig(scene_file=scene_file)


def motion_lib_config(args: argparse.Namespace):
    """Build motion library configuration."""
    return MotionLibConfig(motion_file=args.motion_file)


def env_config(robot_cfg: RobotConfig, args: argparse.Namespace) -> EnvConfig:
    """Build environment configuration (training defaults)."""
    from protomotions.envs.motion_manager.config import MimicMotionManagerConfig
    from protomotions.envs.control.mimic_control import MimicControlConfig
    from protomotions.envs.component_factories import (
        max_coords_obs_factory,
        previous_actions_factory,
        mimic_target_poses_max_coords_factory,
        action_smoothness_factory,
        mimic_tracking_rewards_factory,
        root_axis_rew_factory,
        pow_rew_factory,
        contact_match_rew_factory,
        tracking_error_term_factory,
        relative_body_pos_rew_factory,
        relative_body_ori_rew_factory,
    )
    from protomotions.envs.action import make_pd_action_config

    control_components = {
        "mimic": MimicControlConfig(
            bootstrap_on_episode_end=True,
        )
    }

    observation_components = {
        "max_coords_obs": max_coords_obs_factory(),
        "previous_actions": previous_actions_factory(history_steps=1),
        "mimic_target_poses": mimic_target_poses_max_coords_factory(with_velocities=True),
    }

    termination_components = {
        "tracking_error": tracking_error_term_factory(threshold=0.5),
    }

    reward_components = {
        "action_smoothness": action_smoothness_factory(weight=-0.02),
        **mimic_tracking_rewards_factory(
            gt_weight=0.5,
            gr_weight=0.3,
            gv_weight=0.1,
            gav_weight=0.2,
            rh_weight=0.2,
            gt_coef=-5.0,
            gr_coef=-5.0,
            gv_coef=-0.5,
            gav_coef=-0.1,
            rh_coef=-100.0,
        ),
        "rx_rew": root_axis_rew_factory(axis=0, weight=0.2, coefficient=-100.0),
        "ry_rew": root_axis_rew_factory(axis=1, weight=0.2, coefficient=-100.0),
        "pow_rew": pow_rew_factory(weight=-1e-5, min_value=-0.5),
        "contact_match_rew": contact_match_rew_factory(
            weight=-0.1, zero_during_grace_period=True
        ),
        "rel_body_pos_rew": relative_body_pos_rew_factory(weight=1.0, sigma=0.3),
        "rel_body_ori_rew": relative_body_ori_rew_factory(weight=1.0, sigma=0.4),
    }

    return EnvConfig(
        ref_contact_smooth_window=7,
        max_episode_length=1000,
        num_state_history_steps=2,
        use_motion_root_position=True,
        control_components=control_components,
        observation_components=observation_components,
        termination_components=termination_components,
        reward_components=reward_components,
        action_config=make_pd_action_config(robot_cfg),
        motion_manager=MimicMotionManagerConfig(
            init_start_prob=1.0,
            resample_on_reset=True,
        ),
    )


def agent_config(
    robot_config: RobotConfig, env_config: EnvConfig, args: argparse.Namespace
) -> PPOAgentConfig:
    """Build agent configuration."""
    from protomotions.agents.common.config import MLPWithConcatConfig, MLPLayerConfig
    from protomotions.agents.ppo.config import (
        PPOActorConfig,
        PPOModelConfig,
        AdvantageNormalizationConfig,
    )
    from protomotions.agents.base_agent.config import OptimizerConfig
    from protomotions.agents.evaluators.config import (
        MimicEvaluatorConfig,
        MotionWeightsRulesConfig,
    )
    from protomotions.envs.component_factories import (
        gt_error_factory,
        gr_error_factory,
        max_joint_error_factory,
        relative_body_pos_rew_factory,
        relative_body_ori_rew_factory,
    )

    actor_config = PPOActorConfig(
        num_out=robot_config.kinematic_info.num_dofs,
        actor_logstd=-2.9,
        in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
        mu_key="actor_trunk_out",
        mu_model=MLPWithConcatConfig(
            in_keys=[
                "max_coords_obs",
                "terrain",
                "mimic_target_poses",
                "previous_actions",
            ],
            normalize_obs=True,
            norm_clamp_value=5,
            out_keys=["actor_trunk_out"],
            num_out=robot_config.number_of_actions,
            layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(6)],
        ),
    )

    critic_config = MLPWithConcatConfig(
        in_keys=["max_coords_obs", "terrain", "mimic_target_poses", "previous_actions"],
        out_keys=["value"],
        normalize_obs=True,
        norm_clamp_value=5,
        num_out=1,
        layers=[MLPLayerConfig(units=1024, activation="relu") for _ in range(4)],
    )

    agent_config: PPOAgentConfig = PPOAgentConfig(
        model=PPOModelConfig(
            in_keys=[
                "max_coords_obs",
                "terrain",
                "mimic_target_poses",
                "previous_actions",
            ],
            out_keys=["action", "mean_action", "neglogp", "value"],
            actor=actor_config,
            critic=critic_config,
            actor_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=2e-5),
            critic_optimizer=OptimizerConfig(_target_="torch.optim.Adam", lr=1e-4),
        ),
        batch_size=args.batch_size,
        training_max_steps=args.training_max_steps,
        gradient_clip_val=50.0,
        clip_critic_loss=True,
        evaluator=MimicEvaluatorConfig(
            evaluation_components={
                "gt_error": gt_error_factory(threshold=0.5),
                "gr_error": gr_error_factory(),
                "max_joint_error": max_joint_error_factory(),
                "rel_body_pos_rew": relative_body_pos_rew_factory(
                    weight=1.0, sigma=0.3
                ),
                "rel_body_ori_rew": relative_body_ori_rew_factory(
                    weight=1.0, sigma=0.4
                ),
            },
            motion_weights_rules=MotionWeightsRulesConfig(
                motion_weights_update_success_discount=0.999,
                motion_weights_update_failure_discount=0,
            ),
        ),
        advantage_normalization=AdvantageNormalizationConfig(
            enabled=True, shift_mean=True, use_ema=True
        ),
    )
    return agent_config


def configure_robot_and_simulator(
    robot_cfg: RobotConfig, simulator_cfg: SimulatorConfig, args: argparse.Namespace
):
    """Configure robot and simulator for terrain training."""
    robot_cfg.update_fields(
        contact_bodies=["all_left_foot_bodies", "all_right_foot_bodies"]
    )
    if hasattr(simulator_cfg, "terrain_generator_spec"):
        simulator_cfg.terrain_generator_spec = TERRAIN_GENERATOR_SPEC


def apply_inference_overrides(
    robot_cfg: RobotConfig,
    simulator_cfg: SimulatorConfig,
    env_cfg,
    agent_cfg,
    terrain_cfg: TerrainConfig,
    motion_lib_cfg: MotionLibConfig,
    scene_lib_cfg: SceneLibConfig,
    args: argparse.Namespace,
):
    """Apply evaluation-specific overrides."""
    if hasattr(env_cfg, "termination_components") and env_cfg.termination_components:
        env_cfg.termination_components = {}

    env_cfg.max_episode_length = 1000000
    env_cfg.motion_manager.resample_on_reset = True
    env_cfg.motion_manager.init_start_prob = 1.0