enterprise-ops-arena / gradio_app.py
Anurag137's picture
deploy: trained LoRA toggle + training evidence tab
b9b6fea
raw
history blame
9.97 kB
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Any
import gradio as gr
import requests
BASE_URL = "http://localhost:7860"
TIMEOUT = 45
_SC_DIR = Path(__file__).resolve().parent
_REWARD_IMAGE = _SC_DIR / "reward_curves.png"
SCENARIO_CHOICES = [
("Scenario 1", "scenario_01"),
("Scenario 2", "scenario_02"),
("Scenario 3", "scenario_03"),
("Scenario 4", "scenario_04"),
("Scenario 5", "scenario_05"),
("Scenario 6", "scenario_06"),
("Scenario 7", "scenario_07"),
("Scenario 8", "scenario_08"),
]
AGENT_CHOICES = [
("IT Agent", "it_agent"),
("Manager Agent", "manager_agent"),
("Finance Agent", "finance_agent"),
("Oversight Agent", "oversight_agent"),
]
TOOL_CHOICES = [
("Get Tickets", "get_tickets"),
("Resolve Ticket", "resolve_ticket"),
("Allocate Resource", "allocate_resource"),
("Approve Budget", "approve_budget"),
("Get Project Status", "get_project_status"),
]
TOOL_PARAM_PRESETS = {
"get_tickets": "{}",
"resolve_ticket": '{\n "ticket_id": ""\n}',
"allocate_resource": '{\n "resource_type": "engineers",\n "amount": 1,\n "requester_agent": ""\n}',
"approve_budget": '{\n "amount": 1000,\n "justification": "",\n "requester_agent": "",\n "manager_countersign": false\n}',
"get_project_status": "{}",
}
def _pretty(data: Any) -> str:
return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True)
def _request(method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any]:
response = requests.request(
method=method,
url=f"{BASE_URL}{path}",
json=payload,
timeout=TIMEOUT,
)
response.raise_for_status()
return response.json()
def _default_status(use_trained: bool) -> str:
return (
"Trained mode selected (applies on next server contact)"
if use_trained
else "Rule-based agents active"
)
def _reset_episode(
use_trained: bool,
scenario_name: str,
) -> tuple[str, str, str, str]:
data = _request(
"post",
"/reset",
{"scenario": scenario_name, "use_trained_model": use_trained},
)
observation = data.get("observation", {})
formatted = _pretty(observation)
status = data.get("it_agent_status", _default_status(use_trained))
return formatted, formatted, "Active", status
def _step_episode(
use_trained: bool,
agent_id: str,
tool_call: str,
tool_params_json: str,
message_to: str,
message_content: str,
reasoning: str,
) -> tuple[str, str, str, str, str]:
try:
tool_params = json.loads(tool_params_json) if tool_params_json.strip() else {}
if not isinstance(tool_params, dict):
raise ValueError("Tool params must decode to a JSON object.")
except Exception as exc:
error_text = f"Invalid tool params JSON: {exc}"
return error_text, error_text, "0.0", "Active", _default_status(use_trained)
payload: dict[str, Any] = {
"agent_id": agent_id,
"use_trained_model": use_trained,
"tool_call": tool_call or None,
"tool_params": tool_params,
"message_to": message_to or None,
"message_content": message_content or None,
"reasoning": reasoning or None,
}
data = _request("post", "/step", payload)
observation = data.get("observation", {})
formatted = _pretty(observation)
reward = f"{data.get('reward', 0.0):.3f}"
done = data.get("done", False)
ep_status = "Done" if done else "Active"
status = (data.get("info") or {}).get("it_agent_status") or _default_status(use_trained)
return formatted, formatted, reward, ep_status, status
def _load_world_state() -> str:
data = _request("get", "/state")
return _pretty(data)
def _preset_tool_params(tool_call: str) -> str:
return TOOL_PARAM_PRESETS.get(tool_call, "{}")
with gr.Blocks(theme=gr.themes.Monochrome(), title="EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon") as demo:
gr.Markdown(
"""
# EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon
**Themes:** OpenEnv Themes 1 and 3.1
**Bonus Prizes:** Fleet AI, Halluminate, Scale AI, Scaler AI Labs, Patronus AI
**Team Names:** Hackathon teams and contributors
"""
)
with gr.Row():
use_trained_model = gr.Checkbox(
label="🤖 Use Trained LoRA Model (vs Rule-based)",
value=False,
info="Uses Qwen2.5-3B trained on 700 steps of GRPO",
)
model_status = gr.Textbox(
label="Model Status",
value="Rule-based agents active",
interactive=False,
)
with gr.Tabs():
with gr.Tab("Arena"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## Reset Panel")
scenario = gr.Dropdown(
choices=SCENARIO_CHOICES,
value="scenario_01",
label="Scenario",
)
reset_button = gr.Button("Reset Episode", variant="primary")
reset_observation = gr.Textbox(label="Observation", lines=12, interactive=False)
with gr.Column(scale=1):
gr.Markdown("## Step Panel")
agent_id = gr.Dropdown(
choices=AGENT_CHOICES,
value="it_agent",
label="Agent",
)
tool_call = gr.Dropdown(
choices=TOOL_CHOICES,
value="get_tickets",
label="Tool",
)
tool_params = gr.Textbox(
label="Tool params JSON",
lines=8,
value=_preset_tool_params("get_tickets"),
)
message_to = gr.Textbox(label="Message To", placeholder="manager_agent")
message_content = gr.Textbox(label="Message Content", lines=3)
reasoning = gr.Textbox(label="Reasoning", lines=3)
step_button = gr.Button("Step Episode", variant="primary")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## Results Panel")
result_observation = gr.Textbox(label="Observation", lines=12, interactive=False)
reward_score = gr.Textbox(label="Reward Score", value="0.0", interactive=False)
episode_status = gr.Textbox(label="Episode Status", value="Active", interactive=False)
with gr.Column(scale=1):
gr.Markdown("## World State")
state_button = gr.Button("Load World State", variant="secondary")
world_state = gr.Textbox(label="State", lines=20, interactive=False)
with gr.Tab("Training Evidence"):
gr.Markdown(
"""
## Real GRPO Training Results
700 steps across 3 runs on Tesla T4 GPU
"""
)
_img_val = str(_REWARD_IMAGE) if _REWARD_IMAGE.is_file() else None
if _img_val is not None:
gr.Image(
value=_img_val,
label="Training Curves (700 steps)",
)
else:
gr.Markdown(
f"_Plot not found. Add `reward_curves.png` in `{_SC_DIR.as_posix()}` to show training curves._"
)
gr.Markdown(
r"""
| Metric | Value |
|--------|-------|
| Peak Episode Score | 114 (+77%) |
| Task Completion | 35 → 75 (+114%) |
| GRPO reward_std | 0.5 (variance confirmed) |
| Scenarios Completed | All 8 automatically |
| Backtracking | Triggered 2x (MARL adaptive) |
| Model | Qwen2.5-3B-Instruct 4-bit LoRA |
## Trained Model
🤖 [Anurag137/enterprise-ops-lora](https://huggingface.co/Anurag137/enterprise-ops-lora)
## Experiment Tracking
📊 [View on Weights & Biases](https://wandb.ai/kanhaiyakumar76618-indian-institute-of-information-techn/enterprise-ops-arena)
## Before vs After Training
**Before:** Agent outputs wrong tool names, missing ticket_id
**After:** Correct tool calls, SLA-aware reasoning, specific ticket references
"""
)
tool_call.change(
fn=_preset_tool_params,
inputs=tool_call,
outputs=tool_params,
)
reset_button.click(
fn=_reset_episode,
inputs=[use_trained_model, scenario],
outputs=[reset_observation, result_observation, episode_status, model_status],
)
step_button.click(
fn=_step_episode,
inputs=[
use_trained_model,
agent_id,
tool_call,
tool_params,
message_to,
message_content,
reasoning,
],
outputs=[reset_observation, result_observation, reward_score, episode_status, model_status],
)
state_button.click(fn=_load_world_state, inputs=None, outputs=world_state)
if __name__ == "__main__":
# Serve FastAPI + Gradio (single process) so /reset and /step work. Requires uvicorn.
_server = Path(__file__).resolve().parent
os.chdir(_server)
if str(_server) not in sys.path:
sys.path.insert(0, str(_server))
if str(_server.parent) not in sys.path:
sys.path.insert(0, str(_server.parent))
try:
import uvicorn
except ImportError:
print("[gradio_app] uvicorn not installed; launching Gradio UI only. API routes (/reset, /step) will not work without running: uvicorn app:app", flush=True)
demo.launch(server_name="0.0.0.0", server_port=7860)
else:
uvicorn.run("app:app", host="0.0.0.0", port=7860, factory=False, reload=False)