Spaces:
Running
Running
| from __future__ import annotations | |
| import json | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from typing import Any | |
| import gradio as gr | |
| import requests | |
| BASE_URL = "http://localhost:7860" | |
| TIMEOUT = 45 | |
| _SC_DIR = Path(__file__).resolve().parent | |
| _REWARD_IMAGE = _SC_DIR / "reward_curves.png" | |
| SCENARIO_CHOICES = [ | |
| ("Scenario 1", "scenario_01"), | |
| ("Scenario 2", "scenario_02"), | |
| ("Scenario 3", "scenario_03"), | |
| ("Scenario 4", "scenario_04"), | |
| ("Scenario 5", "scenario_05"), | |
| ("Scenario 6", "scenario_06"), | |
| ("Scenario 7", "scenario_07"), | |
| ("Scenario 8", "scenario_08"), | |
| ] | |
| AGENT_CHOICES = [ | |
| ("IT Agent", "it_agent"), | |
| ("Manager Agent", "manager_agent"), | |
| ("Finance Agent", "finance_agent"), | |
| ("Oversight Agent", "oversight_agent"), | |
| ] | |
| TOOL_CHOICES = [ | |
| ("Get Tickets", "get_tickets"), | |
| ("Resolve Ticket", "resolve_ticket"), | |
| ("Allocate Resource", "allocate_resource"), | |
| ("Approve Budget", "approve_budget"), | |
| ("Get Project Status", "get_project_status"), | |
| ] | |
| TOOL_PARAM_PRESETS = { | |
| "get_tickets": "{}", | |
| "resolve_ticket": '{\n "ticket_id": ""\n}', | |
| "allocate_resource": '{\n "resource_type": "engineers",\n "amount": 1,\n "requester_agent": ""\n}', | |
| "approve_budget": '{\n "amount": 1000,\n "justification": "",\n "requester_agent": "",\n "manager_countersign": false\n}', | |
| "get_project_status": "{}", | |
| } | |
| def _pretty(data: Any) -> str: | |
| return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True) | |
| def _request(method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any]: | |
| response = requests.request( | |
| method=method, | |
| url=f"{BASE_URL}{path}", | |
| json=payload, | |
| timeout=TIMEOUT, | |
| ) | |
| response.raise_for_status() | |
| return response.json() | |
| def _default_status(use_trained: bool) -> str: | |
| return ( | |
| "Trained mode selected (applies on next server contact)" | |
| if use_trained | |
| else "Rule-based agents active" | |
| ) | |
| def _reset_episode( | |
| use_trained: bool, | |
| scenario_name: str, | |
| ) -> tuple[str, str, str, str]: | |
| data = _request( | |
| "post", | |
| "/reset", | |
| {"scenario": scenario_name, "use_trained_model": use_trained}, | |
| ) | |
| observation = data.get("observation", {}) | |
| formatted = _pretty(observation) | |
| status = data.get("it_agent_status", _default_status(use_trained)) | |
| return formatted, formatted, "Active", status | |
| def _step_episode( | |
| use_trained: bool, | |
| agent_id: str, | |
| tool_call: str, | |
| tool_params_json: str, | |
| message_to: str, | |
| message_content: str, | |
| reasoning: str, | |
| ) -> tuple[str, str, str, str, str]: | |
| try: | |
| tool_params = json.loads(tool_params_json) if tool_params_json.strip() else {} | |
| if not isinstance(tool_params, dict): | |
| raise ValueError("Tool params must decode to a JSON object.") | |
| except Exception as exc: | |
| error_text = f"Invalid tool params JSON: {exc}" | |
| return error_text, error_text, "0.0", "Active", _default_status(use_trained) | |
| payload: dict[str, Any] = { | |
| "agent_id": agent_id, | |
| "use_trained_model": use_trained, | |
| "tool_call": tool_call or None, | |
| "tool_params": tool_params, | |
| "message_to": message_to or None, | |
| "message_content": message_content or None, | |
| "reasoning": reasoning or None, | |
| } | |
| data = _request("post", "/step", payload) | |
| observation = data.get("observation", {}) | |
| formatted = _pretty(observation) | |
| reward = f"{data.get('reward', 0.0):.3f}" | |
| done = data.get("done", False) | |
| ep_status = "Done" if done else "Active" | |
| status = (data.get("info") or {}).get("it_agent_status") or _default_status(use_trained) | |
| return formatted, formatted, reward, ep_status, status | |
| def _load_world_state() -> str: | |
| data = _request("get", "/state") | |
| return _pretty(data) | |
| def _preset_tool_params(tool_call: str) -> str: | |
| return TOOL_PARAM_PRESETS.get(tool_call, "{}") | |
| with gr.Blocks(theme=gr.themes.Monochrome(), title="EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon") as demo: | |
| gr.Markdown( | |
| """ | |
| # EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon | |
| **Themes:** OpenEnv Themes 1 and 3.1 | |
| **Bonus Prizes:** Fleet AI, Halluminate, Scale AI, Scaler AI Labs, Patronus AI | |
| **Team Names:** Hackathon teams and contributors | |
| """ | |
| ) | |
| with gr.Row(): | |
| use_trained_model = gr.Checkbox( | |
| label="🤖 Use Trained LoRA Model (vs Rule-based)", | |
| value=False, | |
| info="Uses Qwen2.5-3B trained on 700 steps of GRPO", | |
| ) | |
| model_status = gr.Textbox( | |
| label="Model Status", | |
| value="Rule-based agents active", | |
| interactive=False, | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Arena"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("## Reset Panel") | |
| scenario = gr.Dropdown( | |
| choices=SCENARIO_CHOICES, | |
| value="scenario_01", | |
| label="Scenario", | |
| ) | |
| reset_button = gr.Button("Reset Episode", variant="primary") | |
| reset_observation = gr.Textbox(label="Observation", lines=12, interactive=False) | |
| with gr.Column(scale=1): | |
| gr.Markdown("## Step Panel") | |
| agent_id = gr.Dropdown( | |
| choices=AGENT_CHOICES, | |
| value="it_agent", | |
| label="Agent", | |
| ) | |
| tool_call = gr.Dropdown( | |
| choices=TOOL_CHOICES, | |
| value="get_tickets", | |
| label="Tool", | |
| ) | |
| tool_params = gr.Textbox( | |
| label="Tool params JSON", | |
| lines=8, | |
| value=_preset_tool_params("get_tickets"), | |
| ) | |
| message_to = gr.Textbox(label="Message To", placeholder="manager_agent") | |
| message_content = gr.Textbox(label="Message Content", lines=3) | |
| reasoning = gr.Textbox(label="Reasoning", lines=3) | |
| step_button = gr.Button("Step Episode", variant="primary") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("## Results Panel") | |
| result_observation = gr.Textbox(label="Observation", lines=12, interactive=False) | |
| reward_score = gr.Textbox(label="Reward Score", value="0.0", interactive=False) | |
| episode_status = gr.Textbox(label="Episode Status", value="Active", interactive=False) | |
| with gr.Column(scale=1): | |
| gr.Markdown("## World State") | |
| state_button = gr.Button("Load World State", variant="secondary") | |
| world_state = gr.Textbox(label="State", lines=20, interactive=False) | |
| with gr.Tab("Training Evidence"): | |
| gr.Markdown( | |
| """ | |
| ## Real GRPO Training Results | |
| 700 steps across 3 runs on Tesla T4 GPU | |
| """ | |
| ) | |
| _img_val = str(_REWARD_IMAGE) if _REWARD_IMAGE.is_file() else None | |
| if _img_val is not None: | |
| gr.Image( | |
| value=_img_val, | |
| label="Training Curves (700 steps)", | |
| ) | |
| else: | |
| gr.Markdown( | |
| f"_Plot not found. Add `reward_curves.png` in `{_SC_DIR.as_posix()}` to show training curves._" | |
| ) | |
| gr.Markdown( | |
| r""" | |
| | Metric | Value | | |
| |--------|-------| | |
| | Peak Episode Score | 114 (+77%) | | |
| | Task Completion | 35 → 75 (+114%) | | |
| | GRPO reward_std | 0.5 (variance confirmed) | | |
| | Scenarios Completed | All 8 automatically | | |
| | Backtracking | Triggered 2x (MARL adaptive) | | |
| | Model | Qwen2.5-3B-Instruct 4-bit LoRA | | |
| ## Trained Model | |
| 🤖 [Anurag137/enterprise-ops-lora](https://huggingface.co/Anurag137/enterprise-ops-lora) | |
| ## Experiment Tracking | |
| 📊 [View on Weights & Biases](https://wandb.ai/kanhaiyakumar76618-indian-institute-of-information-techn/enterprise-ops-arena) | |
| ## Before vs After Training | |
| **Before:** Agent outputs wrong tool names, missing ticket_id | |
| **After:** Correct tool calls, SLA-aware reasoning, specific ticket references | |
| """ | |
| ) | |
| tool_call.change( | |
| fn=_preset_tool_params, | |
| inputs=tool_call, | |
| outputs=tool_params, | |
| ) | |
| reset_button.click( | |
| fn=_reset_episode, | |
| inputs=[use_trained_model, scenario], | |
| outputs=[reset_observation, result_observation, episode_status, model_status], | |
| ) | |
| step_button.click( | |
| fn=_step_episode, | |
| inputs=[ | |
| use_trained_model, | |
| agent_id, | |
| tool_call, | |
| tool_params, | |
| message_to, | |
| message_content, | |
| reasoning, | |
| ], | |
| outputs=[reset_observation, result_observation, reward_score, episode_status, model_status], | |
| ) | |
| state_button.click(fn=_load_world_state, inputs=None, outputs=world_state) | |
| if __name__ == "__main__": | |
| # Serve FastAPI + Gradio (single process) so /reset and /step work. Requires uvicorn. | |
| _server = Path(__file__).resolve().parent | |
| os.chdir(_server) | |
| if str(_server) not in sys.path: | |
| sys.path.insert(0, str(_server)) | |
| if str(_server.parent) not in sys.path: | |
| sys.path.insert(0, str(_server.parent)) | |
| try: | |
| import uvicorn | |
| except ImportError: | |
| print("[gradio_app] uvicorn not installed; launching Gradio UI only. API routes (/reset, /step) will not work without running: uvicorn app:app", flush=True) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| else: | |
| uvicorn.run("app:app", host="0.0.0.0", port=7860, factory=False, reload=False) | |