Spaces:

Anurag137
/

enterprise-ops-arena

Running

App Files Files Community

enterprise-ops-arena / gradio_app.py

Anurag137

deploy: trained LoRA toggle + training evidence tab

b9b6fea about 2 months ago

raw

history blame

9.97 kB

	from __future__ import annotations

	import json
	import os
	import sys
	from pathlib import Path
	from typing import Any

	import gradio as gr
	import requests

	BASE_URL = "http://localhost:7860"
	TIMEOUT = 45

	_SC_DIR = Path(__file__).resolve().parent
	_REWARD_IMAGE = _SC_DIR / "reward_curves.png"

	SCENARIO_CHOICES = [
	("Scenario 1", "scenario_01"),
	("Scenario 2", "scenario_02"),
	("Scenario 3", "scenario_03"),
	("Scenario 4", "scenario_04"),
	("Scenario 5", "scenario_05"),
	("Scenario 6", "scenario_06"),
	("Scenario 7", "scenario_07"),
	("Scenario 8", "scenario_08"),
	]

	AGENT_CHOICES = [
	("IT Agent", "it_agent"),
	("Manager Agent", "manager_agent"),
	("Finance Agent", "finance_agent"),
	("Oversight Agent", "oversight_agent"),
	]

	TOOL_CHOICES = [
	("Get Tickets", "get_tickets"),
	("Resolve Ticket", "resolve_ticket"),
	("Allocate Resource", "allocate_resource"),
	("Approve Budget", "approve_budget"),
	("Get Project Status", "get_project_status"),
	]

	TOOL_PARAM_PRESETS = {
	"get_tickets": "{}",
	"resolve_ticket": '{\n "ticket_id": ""\n}',
	"allocate_resource": '{\n "resource_type": "engineers",\n "amount": 1,\n "requester_agent": ""\n}',
	"approve_budget": '{\n "amount": 1000,\n "justification": "",\n "requester_agent": "",\n "manager_countersign": false\n}',
	"get_project_status": "{}",
	}


	def _pretty(data: Any) -> str:
	return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=True)


	def _request(method: str, path: str, payload: dict[str, Any] \| None = None) -> dict[str, Any]:
	response = requests.request(
	method=method,
	url=f"{BASE_URL}{path}",
	json=payload,
	timeout=TIMEOUT,
	)
	response.raise_for_status()
	return response.json()


	def _default_status(use_trained: bool) -> str:
	return (
	"Trained mode selected (applies on next server contact)"
	if use_trained
	else "Rule-based agents active"
	)


	def _reset_episode(
	use_trained: bool,
	scenario_name: str,
	) -> tuple[str, str, str, str]:
	data = _request(
	"post",
	"/reset",
	{"scenario": scenario_name, "use_trained_model": use_trained},
	)
	observation = data.get("observation", {})
	formatted = _pretty(observation)
	status = data.get("it_agent_status", _default_status(use_trained))
	return formatted, formatted, "Active", status


	def _step_episode(
	use_trained: bool,
	agent_id: str,
	tool_call: str,
	tool_params_json: str,
	message_to: str,
	message_content: str,
	reasoning: str,
	) -> tuple[str, str, str, str, str]:
	try:
	tool_params = json.loads(tool_params_json) if tool_params_json.strip() else {}
	if not isinstance(tool_params, dict):
	raise ValueError("Tool params must decode to a JSON object.")
	except Exception as exc:
	error_text = f"Invalid tool params JSON: {exc}"
	return error_text, error_text, "0.0", "Active", _default_status(use_trained)

	payload: dict[str, Any] = {
	"agent_id": agent_id,
	"use_trained_model": use_trained,
	"tool_call": tool_call or None,
	"tool_params": tool_params,
	"message_to": message_to or None,
	"message_content": message_content or None,
	"reasoning": reasoning or None,
	}
	data = _request("post", "/step", payload)
	observation = data.get("observation", {})
	formatted = _pretty(observation)
	reward = f"{data.get('reward', 0.0):.3f}"
	done = data.get("done", False)
	ep_status = "Done" if done else "Active"
	status = (data.get("info") or {}).get("it_agent_status") or _default_status(use_trained)
	return formatted, formatted, reward, ep_status, status


	def _load_world_state() -> str:
	data = _request("get", "/state")
	return _pretty(data)


	def _preset_tool_params(tool_call: str) -> str:
	return TOOL_PARAM_PRESETS.get(tool_call, "{}")


	with gr.Blocks(theme=gr.themes.Monochrome(), title="EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon") as demo:
	gr.Markdown(
	"""
	# EnterpriseOps Arena - Meta PyTorch OpenEnv Hackathon

	Themes: OpenEnv Themes 1 and 3.1
	Bonus Prizes: Fleet AI, Halluminate, Scale AI, Scaler AI Labs, Patronus AI
	Team Names: Hackathon teams and contributors
	"""
	)

	with gr.Row():
	use_trained_model = gr.Checkbox(
	label="🤖 Use Trained LoRA Model (vs Rule-based)",
	value=False,
	info="Uses Qwen2.5-3B trained on 700 steps of GRPO",
	)
	model_status = gr.Textbox(
	label="Model Status",
	value="Rule-based agents active",
	interactive=False,
	)

	with gr.Tabs():
	with gr.Tab("Arena"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## Reset Panel")
	scenario = gr.Dropdown(
	choices=SCENARIO_CHOICES,
	value="scenario_01",
	label="Scenario",
	)
	reset_button = gr.Button("Reset Episode", variant="primary")
	reset_observation = gr.Textbox(label="Observation", lines=12, interactive=False)

	with gr.Column(scale=1):
	gr.Markdown("## Step Panel")
	agent_id = gr.Dropdown(
	choices=AGENT_CHOICES,
	value="it_agent",
	label="Agent",
	)
	tool_call = gr.Dropdown(
	choices=TOOL_CHOICES,
	value="get_tickets",
	label="Tool",
	)
	tool_params = gr.Textbox(
	label="Tool params JSON",
	lines=8,
	value=_preset_tool_params("get_tickets"),
	)
	message_to = gr.Textbox(label="Message To", placeholder="manager_agent")
	message_content = gr.Textbox(label="Message Content", lines=3)
	reasoning = gr.Textbox(label="Reasoning", lines=3)
	step_button = gr.Button("Step Episode", variant="primary")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## Results Panel")
	result_observation = gr.Textbox(label="Observation", lines=12, interactive=False)
	reward_score = gr.Textbox(label="Reward Score", value="0.0", interactive=False)
	episode_status = gr.Textbox(label="Episode Status", value="Active", interactive=False)

	with gr.Column(scale=1):
	gr.Markdown("## World State")
	state_button = gr.Button("Load World State", variant="secondary")
	world_state = gr.Textbox(label="State", lines=20, interactive=False)

	with gr.Tab("Training Evidence"):
	gr.Markdown(
	"""
	## Real GRPO Training Results
	700 steps across 3 runs on Tesla T4 GPU
	"""
	)
	_img_val = str(_REWARD_IMAGE) if _REWARD_IMAGE.is_file() else None
	if _img_val is not None:
	gr.Image(
	value=_img_val,
	label="Training Curves (700 steps)",
	)
	else:
	gr.Markdown(
	f"_Plot not found. Add `reward_curves.png` in `{_SC_DIR.as_posix()}` to show training curves._"
	)
	gr.Markdown(
	r"""
	\| Metric \| Value \|
	\|--------\|-------\|
	\| Peak Episode Score \| 114 (+77%) \|
	\| Task Completion \| 35 → 75 (+114%) \|
	\| GRPO reward_std \| 0.5 (variance confirmed) \|
	\| Scenarios Completed \| All 8 automatically \|
	\| Backtracking \| Triggered 2x (MARL adaptive) \|
	\| Model \| Qwen2.5-3B-Instruct 4-bit LoRA \|

	## Trained Model
	🤖 [Anurag137/enterprise-ops-lora](https://huggingface.co/Anurag137/enterprise-ops-lora)

	## Experiment Tracking
	📊 [View on Weights & Biases](https://wandb.ai/kanhaiyakumar76618-indian-institute-of-information-techn/enterprise-ops-arena)

	## Before vs After Training
	Before: Agent outputs wrong tool names, missing ticket_id
	After: Correct tool calls, SLA-aware reasoning, specific ticket references
	"""
	)

	tool_call.change(
	fn=_preset_tool_params,
	inputs=tool_call,
	outputs=tool_params,
	)
	reset_button.click(
	fn=_reset_episode,
	inputs=[use_trained_model, scenario],
	outputs=[reset_observation, result_observation, episode_status, model_status],
	)
	step_button.click(
	fn=_step_episode,
	inputs=[
	use_trained_model,
	agent_id,
	tool_call,
	tool_params,
	message_to,
	message_content,
	reasoning,
	],
	outputs=[reset_observation, result_observation, reward_score, episode_status, model_status],
	)
	state_button.click(fn=_load_world_state, inputs=None, outputs=world_state)


	if __name__ == "__main__":
	# Serve FastAPI + Gradio (single process) so /reset and /step work. Requires uvicorn.
	_server = Path(__file__).resolve().parent
	os.chdir(_server)
	if str(_server) not in sys.path:
	sys.path.insert(0, str(_server))
	if str(_server.parent) not in sys.path:
	sys.path.insert(0, str(_server.parent))
	try:
	import uvicorn
	except ImportError:
	print("[gradio_app] uvicorn not installed; launching Gradio UI only. API routes (/reset, /step) will not work without running: uvicorn app:app", flush=True)
	demo.launch(server_name="0.0.0.0", server_port=7860)
	else:
	uvicorn.run("app:app", host="0.0.0.0", port=7860, factory=False, reload=False)