from __future__ import annotations

import json
import os
import sys
from pathlib import Path
from typing import Any

import gradio as gr
import requests

BASE_URL = "http://localhost:7860"
TIMEOUT = 45

_SC_DIR = Path(__file__).resolve().parent
_REWARD_IMAGE = _SC_DIR / "reward_curves.png"

SCENARIO_CHOICES = [
    ("Scenario 1", "scenario_01"),
    ("Scenario 2", "scenario_02"),
    ("Scenario 3", "scenario_03"),
    ("Scenario 4", "scenario_04"),
    ("Scenario 5", "scenario_05"),
    ("Scenario 6", "scenario_06"),
    ("Scenario 7", "scenario_07"),
    ("Scenario 8", "scenario_08"),
]

AGENT_CHOICES = [
    ("IT Agent", "it_agent"),
    ("Manager Agent", "manager_agent"),
    ("Finance Agent", "finance_agent"),
    ("Oversight Agent", "oversight_agent"),
]

TOOL_CHOICES = [
    ("Get Tickets", "get_tickets"),
    ("Resolve Ticket", "resolve_ticket"),
    ("Allocate Resource", "allocate_resource"),
    ("Approve Budget", "approve_budget"),
    ("Get Project Status", "get_project_status"),
]

TOOL_PARAM_PRESETS = {
    "get_tickets": "{}",
    "resolve_ticket": '{\n  "ticket_id": ""\n}',
    "allocate_resource": '{\n  "resource_type": "engineers",\n  "amount": 1,\n  "requester_agent": ""\n}',
    "approve_budget": '{\n  "amount": 1000,\n  "justification": "",\n  "requester_agent": "",\n  "manager_countersign": false\n}',
    "get_project_status": "{}",
}


def _pretty(data: Any) -> str:
    return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True)


def _request(method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any]:
    response = requests.request(
        method=method,
        url=f"{BASE_URL}{path}",
        json=payload,
        timeout=TIMEOUT,
    )
    response.raise_for_status()
    return response.json()


def _default_status(use_trained: bool) -> str:
    return (
        "Trained mode selected (applies on next server contact)"
        if use_trained
        else "Rule-based agents active"
    )


def _reset_episode(
    use_trained: bool,
    scenario_name: str,
) -> tuple[str, str, str, str]:
    data = _request(
        "post",
        "/reset",
        {"scenario": scenario_name, "use_trained_model": use_trained},
    )
    observation = data.get("observation", {})
    formatted = _pretty(observation)
    status = data.get("it_agent_status", _default_status(use_trained))
    return formatted, formatted, "Active", status


def _step_episode(
    use_trained: bool,
    agent_id: str,
    tool_call: str,
    tool_params_json: str,
    message_to: str,
    message_content: str,
    reasoning: str,
) -> tuple[str, str, str, str, str]:
    try:
        tool_params = json.loads(tool_params_json) if tool_params_json.strip() else {}
        if not isinstance(tool_params, dict):
            raise ValueError("Tool params must decode to a JSON object.")
    except Exception as exc:
        error_text = f"Invalid tool params JSON: {exc}"
        return error_text, error_text, "0.0", "Active", _default_status(use_trained)

    payload: dict[str, Any] = {
        "agent_id": agent_id,
        "use_trained_model": use_trained,
        "tool_call": tool_call or None,
        "tool_params": tool_params,
        "message_to": message_to or None,
        "message_content": message_content or None,
        "reasoning": reasoning or None,
    }
    data = _request("post", "/step", payload)
    observation = data.get("observation", {})
    formatted = _pretty(observation)
    reward = f"{data.get('reward', 0.0):.3f}"
    done = data.get("done", False)
    ep_status = "Done" if done else "Active"
    status = (data.get("info") or {}).get("it_agent_status") or _default_status(use_trained)
    return formatted, formatted, reward, ep_status, status


def _load_world_state() -> str:
    data = _request("get", "/state")
    return _pretty(data)


def _preset_tool_params(tool_call: str) -> str:
    return TOOL_PARAM_PRESETS.get(tool_call, "{}")


with gr.Blocks(theme=gr.themes.Monochrome(), title="EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon") as demo:
    gr.Markdown(
        """
        # EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon

        **Themes:** OpenEnv Themes 1 and 3.1  
        **Bonus Prizes:** Fleet AI, Halluminate, Scale AI, Scaler AI Labs, Patronus AI  
        **Team Names:** Hackathon teams and contributors
        """
    )

    with gr.Row():
        use_trained_model = gr.Checkbox(
            label="🤖 Use Trained LoRA Model (vs Rule-based)",
            value=False,
            info="Uses Qwen2.5-3B trained on 700 steps of GRPO",
        )
        model_status = gr.Textbox(
            label="Model Status",
            value="Rule-based agents active",
            interactive=False,
        )

    with gr.Tabs():
        with gr.Tab("Arena"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("## Reset Panel")
                    scenario = gr.Dropdown(
                        choices=SCENARIO_CHOICES,
                        value="scenario_01",
                        label="Scenario",
                    )
                    reset_button = gr.Button("Reset Episode", variant="primary")
                    reset_observation = gr.Textbox(label="Observation", lines=12, interactive=False)

                with gr.Column(scale=1):
                    gr.Markdown("## Step Panel")
                    agent_id = gr.Dropdown(
                        choices=AGENT_CHOICES,
                        value="it_agent",
                        label="Agent",
                    )
                    tool_call = gr.Dropdown(
                        choices=TOOL_CHOICES,
                        value="get_tickets",
                        label="Tool",
                    )
                    tool_params = gr.Textbox(
                        label="Tool params JSON",
                        lines=8,
                        value=_preset_tool_params("get_tickets"),
                    )
                    message_to = gr.Textbox(label="Message To", placeholder="manager_agent")
                    message_content = gr.Textbox(label="Message Content", lines=3)
                    reasoning = gr.Textbox(label="Reasoning", lines=3)
                    step_button = gr.Button("Step Episode", variant="primary")

            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("## Results Panel")
                    result_observation = gr.Textbox(label="Observation", lines=12, interactive=False)
                    reward_score = gr.Textbox(label="Reward Score", value="0.0", interactive=False)
                    episode_status = gr.Textbox(label="Episode Status", value="Active", interactive=False)

                with gr.Column(scale=1):
                    gr.Markdown("## World State")
                    state_button = gr.Button("Load World State", variant="secondary")
                    world_state = gr.Textbox(label="State", lines=20, interactive=False)

        with gr.Tab("Training Evidence"):
            gr.Markdown(
                """
                ## Real GRPO Training Results
                700 steps across 3 runs on Tesla T4 GPU
                """
            )
            _img_val = str(_REWARD_IMAGE) if _REWARD_IMAGE.is_file() else None
            if _img_val is not None:
                gr.Image(
                    value=_img_val,
                    label="Training Curves (700 steps)",
                )
            else:
                gr.Markdown(
                    f"_Plot not found. Add `reward_curves.png` in `{_SC_DIR.as_posix()}` to show training curves._"
                )
            gr.Markdown(
                r"""
| Metric | Value |
|--------|-------|
| Peak Episode Score | 114 (+77%) |
| Task Completion | 35 → 75 (+114%) |
| GRPO reward_std | 0.5 (variance confirmed) |
| Scenarios Completed | All 8 automatically |
| Backtracking | Triggered 2x (MARL adaptive) |
| Model | Qwen2.5-3B-Instruct 4-bit LoRA |

## Trained Model
🤖 [Anurag137/enterprise-ops-lora](https://huggingface.co/Anurag137/enterprise-ops-lora)

## Experiment Tracking
📊 [View on Weights & Biases](https://wandb.ai/kanhaiyakumar76618-indian-institute-of-information-techn/enterprise-ops-arena)

## Before vs After Training
**Before:** Agent outputs wrong tool names, missing ticket_id
**After:** Correct tool calls, SLA-aware reasoning, specific ticket references
                """
            )

    tool_call.change(
        fn=_preset_tool_params,
        inputs=tool_call,
        outputs=tool_params,
    )
    reset_button.click(
        fn=_reset_episode,
        inputs=[use_trained_model, scenario],
        outputs=[reset_observation, result_observation, episode_status, model_status],
    )
    step_button.click(
        fn=_step_episode,
        inputs=[
            use_trained_model,
            agent_id,
            tool_call,
            tool_params,
            message_to,
            message_content,
            reasoning,
        ],
        outputs=[reset_observation, result_observation, reward_score, episode_status, model_status],
    )
    state_button.click(fn=_load_world_state, inputs=None, outputs=world_state)


if __name__ == "__main__":
    # Serve FastAPI + Gradio (single process) so /reset and /step work. Requires uvicorn.
    _server = Path(__file__).resolve().parent
    os.chdir(_server)
    if str(_server) not in sys.path:
        sys.path.insert(0, str(_server))
    if str(_server.parent) not in sys.path:
        sys.path.insert(0, str(_server.parent))
    try:
        import uvicorn
    except ImportError:
        print("[gradio_app] uvicorn not installed; launching Gradio UI only. API routes (/reset, /step) will not work without running: uvicorn app:app", flush=True)
        demo.launch(server_name="0.0.0.0", server_port=7860)
    else:
        uvicorn.run("app:app", host="0.0.0.0", port=7860, factory=False, reload=False)