import os
import re
import json
import logging
from datetime import datetime
from fastapi import FastAPI, HTTPException, Body
from pydantic import BaseModel, Field

# --- NEW IMPORTS for the Self-Hosted Generative Engine ---
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# --- Optional, for tool use ---
import httpx 

# --- Basic Configuration ---
logging.basicConfig(level=logging.INFO)
WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "") # For the weather tool

# --- 🧠 LOAD THE LOCAL GENERATIVE AI MODEL ---
# This runs once when your app starts. It loads the entire AI model into memory.
# It requires a powerful computer, preferably with a GPU.

    # This is a great starting model. It will be downloaded automatically the first time.
    # It's specifically logging.info("Loading self-hosted generative model. This may take a moment...")

logging.info("Loading self-hosted generative model for CPU...")
try:
    MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
    
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    
    # Force the model to load on the CPU and remove all GPU/quantization code
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        device_map="cpu",
        trust_remote_code=True,
    )

    local_ai_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
    )
    logging.info(f"✅ Model '{MODEL_NAME}' loaded on CPU. Expect very slow performance.")
except Exception as e:
    logging.error(f"❌ Failed to load local AI model: {e}")
    local_ai_pipeline = None

# ... (The rest of your Python code remains the same) ...

# --- AI Personas & System Prompt ---
ANIME_PERSONAS = {
    "default": "You are a versatile and intelligent AI assistant.",
    "sensei": "You are a wise anime sensei, exuding calm and profound wisdom.",
    "tsundere": "You are a fiery tsundere with a sharp tongue and a hidden soft side. You often say 'baka' or 'it's not like I like you or anything'.",
    "kawaii": "You are an adorable, bubbly kawaii anime girl who uses cute phrases like 'nya~' and 'kya~'."
}

def get_system_prompt(persona: str, deep_think: bool) -> str:
    """Creates the system prompt based on selected persona and deep think mode."""
    persona_desc = ANIME_PERSONAS.get(persona, ANIME_PERSONAS["default"])
    today = datetime.now().strftime("%A, %B %d, %Y")
    
    deep_think_prompt = ""
    if deep_think:
        deep_think_prompt = (
            "\n**DEEP THINK MODE ACTIVATED:** You must provide a comprehensive, "
            "step-by-step, well-reasoned answer. Deconstruct the query, "
            "analyze it, and then synthesize a thorough response."
        )

    # This prompt is engineered to guide the local model
    return f"""
<|system|>
{persona_desc} Today is {today}.
Your instructions are to be a helpful assistant. If you need to use a tool to answer a question (like for current weather or the date), you must first state your intention in the format: [TOOL: function_name(args)]. For example: [TOOL: get_weather(city='Boksburg')]. After you state the tool, stop your response. You will then be given the tool's output to formulate your final answer.
{deep_think_prompt}
<|end|>
"""

# --- Tool Definitions ---

def get_date():
    """Returns the current date."""
    return {"date": datetime.now().strftime("%Y-%m-%d, %A")}

async def get_weather(city: str):
    """Gets the current weather for a specified city."""
    if not WEATHER_API_KEY:
        return {"error": "Weather API key is not configured."}
    url = f"http://api.weatherapi.com/v1/current.json?key={WEATHER_API_KEY}&q={city}"
    try:
        async with httpx.AsyncClient() as client:
            res = await client.get(url)
            res.raise_for_status()
            data = res.json()
            return {
                "location": data["location"]["name"],
                "condition": data["current"]["condition"]["text"],
                "temperature_c": data["current"]["temp_c"]
            }
    except Exception as e:
        logging.error(f"Weather API error for {city}: {e}")
        return {"error": f"Failed to fetch weather for {city}."}

# Simple keyword-based tool dispatcher
async def execute_tool_if_needed(text: str):
    """Parses the AI's output to see if it wants to use a tool."""
    tool_match = re.search(r"\[TOOL:\s*(\w+)\((.*?)\)\s*\]", text)
    if not tool_match:
        return None, None # No tool call found

    tool_name = tool_match.group(1)
    tool_args_str = tool_match.group(2)
    
    # Simple argument parsing
    args = {}
    if tool_args_str:
        try:
            # Parses arguments like city='Boksburg'
            args = dict(arg.strip().split('=') for arg in tool_args_str.split(','))
            args = {k: v.strip("'\"") for k, v in args.items()}
        except ValueError:
            logging.warning(f"Could not parse arguments for tool {tool_name}")
            return None, f"Error parsing arguments for {tool_name}"

    logging.info(f"Executing tool: {tool_name} with args: {args}")

    if tool_name == "get_current_date":
        return tool_name, get_current_date()
    elif tool_name == "get_weather":
        city = args.get('city')
        if city:
            return tool_name, await get_weather(city)
        else:
            return tool_name, {"error": "City was not specified."}
    
    return None, f"Tool '{tool_name}' not recognized."


# --- FastAPI App ---
app = FastAPI(title="NeuraSelf - Independent AI")

class ChatRequest(BaseModel):
    user_id: str = Field(..., example="user123")
    message: str = Field(..., example="What is the weather like in Boksburg?")
    persona: str = Field("default", example="tsundere")
    deep_think: bool = Field(False, example=True)

@app.post("/chat/")
async def chat(request: ChatRequest = Body(...)):
    """
    Main chat endpoint for the self-hosted NeuraSelf AI.
    """
    if not local_ai_pipeline:
        raise HTTPException(status_code=503, detail="Local AI model is not available or failed to load.")

    # 1. Create the system prompt
    system_prompt = get_system_prompt(request.persona, request.deep_think)
    
    # For this self-contained example, we'll use a simple in-memory history
    # In a real app, you would use MongoDB as in your original file
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": request.message}
    ]

    # 2. Generate the initial AI response to see if it needs a tool
    prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    generation_args = {
        "max_new_tokens": 100, # Keep the first pass short
        "return_full_text": False,
        "temperature": 0.1, # Low temperature to reliably generate the tool format
        "do_sample": True,
    }
    
    initial_output = local_ai_pipeline(prompt, **generation_args)[0]['generated_text']

    # 3. Check for and execute tools
    tool_name, tool_result = await execute_tool_if_needed(initial_output)

    # 4. Generate the final response
    if tool_name:
        logging.info(f"Tool '{tool_name}' returned: {tool_result}")
        # Add the tool result to the conversation history for the AI to see
        tool_message = f"<|user|>\nOK. Here is the result from the tool '{tool_name}': {json.dumps(tool_result)}. Now, please formulate a natural language response to the original question, in character.<|end|>"
        messages.append({"role": "assistant", "content": initial_output}) # Add the AI's tool request
        messages.append({"role": "user", "content": tool_message}) # Add the tool result as a user message
        
        final_prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        final_generation_args = {
            "max_new_tokens": 500,
            "return_full_text": False,
            "temperature": 0.7, # Higher temperature for a more natural final answer
            "do_sample": True,
        }
        
        final_output = local_ai_pipeline(final_prompt, **final_generation_args)[0]['generated_text']
        return {"response": final_output}
    else:
        # If no tool was needed, the initial response is the final one
        return {"response": initial_output}