Spaces:

lucsanscartier
/

lingo-agent

Paused

App Files Files Community

lingo-agent / llm_engine.py

lucsanscartier

Wire Ollama into chat fallback chain

38f4e7f verified 7 days ago

raw

history blame contribute delete

6.05 kB

	"""
	llm_engine.py — Hugging Face router chat with Ollama + local rules fallback.

	Primary path: Hugging Face router chat completions.
	Fallback 1: optional external Ollama endpoint.
	Fallback 2: deterministic local receptionist rules.
	"""

	from __future__ import annotations

	import logging
	from typing import Any

	import httpx

	from config import Settings
	import ollama_engine

	logger = logging.getLogger(__name__)

	DEFAULT_CHAT_FALLBACKS = [
	"meta-llama/Llama-3.1-8B-Instruct:cerebras",
	"openai/gpt-oss-120b:cerebras",
	"Qwen/Qwen3-4B-Thinking-2507:cerebras",
	"Qwen/Qwen2.5-7B-Instruct-1M:cerebras",
	"deepseek-ai/DeepSeek-R1:cerebras",
	]


	def candidate_models(settings: Settings) -> list[str]:
	models: list[str] = []
	if settings.hf_chat_model:
	models.append(settings.hf_chat_model)
	for model in DEFAULT_CHAT_FALLBACKS:
	if model not in models:
	models.append(model)
	return models


	def _headers(settings: Settings) -> dict[str, str]:
	return {
	"Authorization": f"Bearer {settings.hf_token}",
	"Content-Type": "application/json",
	}


	def _short_error(resp: httpx.Response) -> str:
	try:
	return str(resp.json())[:500]
	except Exception:
	return resp.text[:500]


	def _last_user_text(messages: list[dict[str, Any]]) -> str:
	for message in reversed(messages):
	if message.get("role") == "user":
	return str(message.get("content", "")).strip()
	return ""


	def local_receptionist_reply(messages: list[dict[str, Any]], business_name: str = "the business") -> str:
	"""Small deterministic fallback for beta phone-call continuity."""
	user_text = _last_user_text(messages)
	lower = user_text.lower()

	if "lingo_ok" in lower or "say lingo_ok" in lower:
	return "LINGO_OK"

	if any(word in lower for word in ["human", "operator", "real person", "representative", "someone"]):
	return "Of course — let me connect you with a team member right away. Please hold for just a moment.\n[ESCALATE]"

	if any(word in lower for word in ["appointment", "book", "booking", "schedule", "reservation"]):
	return (
	"I can help take an appointment request. Please tell me your full name, preferred date and time, "
	"and the reason for the appointment. Someone from the team will confirm it shortly."
	)

	if any(word in lower for word in ["message", "call me", "call back", "voicemail"]):
	return (
	"I can take a message. Please share your name, the best callback number, "
	"and what you would like the team to know."
	)

	if any(word in lower for word in ["hours", "open", "close", "address", "price", "cost"]):
	return (
	f"I do not have that specific detail for {business_name} on hand yet. "
	"I can take a message and have someone follow up with you."
	)

	if not user_text:
	return "Hi, this is LINGO, your AI assistant. How can I help you today?"

	return (
	f"Thanks for calling {business_name}. I can answer basic questions, take a message, "
	"or collect an appointment request. How can I help?"
	)


	async def chat_completion(
	messages: list[dict[str, Any]],
	http_client: httpx.AsyncClient,
	settings: Settings,
	max_tokens: int = 256,
	temperature: float = 0.5,
	allow_local_fallback: bool = True,
	) -> dict[str, Any]:
	"""Try HF chat models, then Ollama, then local beta rules if needed."""
	errors: list[dict[str, Any]] = []
	for model in candidate_models(settings):
	payload = {
	"model": model,
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature,
	}
	try:
	resp = await http_client.post(
	settings.hf_chat_url,
	json=payload,
	headers=_headers(settings),
	timeout=60.0,
	)
	if not resp.is_success:
	errors.append({"model": model, "status_code": resp.status_code, "error": _short_error(resp)})
	continue
	data = resp.json()
	content = data["choices"][0]["message"]["content"]
	return {
	"ok": True,
	"source": "hf_provider",
	"provider_chat_ok": True,
	"ollama_ok": False,
	"model": model,
	"content": str(content).strip(),
	"raw_model": data.get("model"),
	}
	except Exception as exc: # noqa: BLE001
	errors.append({"model": model, "exception": str(exc)[:500]})

	if allow_local_fallback and ollama_engine.enabled(settings):
	ollama_result = await ollama_engine.chat_completion(messages, http_client, settings)
	if ollama_result.get("ok"):
	return {
	**ollama_result,
	"provider_chat_ok": False,
	"ollama_ok": True,
	"provider_errors": errors,
	"warning": "HF provider chat failed; using external Ollama fallback.",
	}
	errors.append({"model": "ollama", "error": ollama_result})

	if allow_local_fallback:
	return {
	"ok": True,
	"source": "local_rules_fallback",
	"provider_chat_ok": False,
	"ollama_ok": False,
	"model": "local-receptionist-rules",
	"content": local_receptionist_reply(messages, settings.business_name),
	"provider_errors": errors,
	"warning": "HF provider chat failed; using local beta rules fallback.",
	}

	return {"ok": False, "errors": errors}


	async def chat_text(
	messages: list[dict[str, Any]],
	http_client: httpx.AsyncClient,
	settings: Settings,
	) -> str:
	result = await chat_completion(messages, http_client, settings)
	if result.get("ok"):
	return str(result["content"])
	raise RuntimeError(f"All chat models failed: {result.get('errors')}")