import os import re import json import logging from datetime import datetime from fastapi import FastAPI, HTTPException, Body from pydantic import BaseModel, Field # --- NEW IMPORTS for the Self-Hosted Generative Engine --- import torch from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # --- Optional, for tool use --- import httpx # --- Basic Configuration --- logging.basicConfig(level=logging.INFO) WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "") # For the weather tool # --- 🧠 LOAD THE LOCAL GENERATIVE AI MODEL --- # This runs once when your app starts. It loads the entire AI model into memory. # It requires a powerful computer, preferably with a GPU. # This is a great starting model. It will be downloaded automatically the first time. # It's specifically logging.info("Loading self-hosted generative model. This may take a moment...") logging.info("Loading self-hosted generative model for CPU...") try: MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Force the model to load on the CPU and remove all GPU/quantization code model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="cpu", trust_remote_code=True, ) local_ai_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) logging.info(f"✅ Model '{MODEL_NAME}' loaded on CPU. Expect very slow performance.") except Exception as e: logging.error(f"❌ Failed to load local AI model: {e}") local_ai_pipeline = None # ... (The rest of your Python code remains the same) ... # --- AI Personas & System Prompt --- ANIME_PERSONAS = { "default": "You are a versatile and intelligent AI assistant.", "sensei": "You are a wise anime sensei, exuding calm and profound wisdom.", "tsundere": "You are a fiery tsundere with a sharp tongue and a hidden soft side. You often say 'baka' or 'it's not like I like you or anything'.", "kawaii": "You are an adorable, bubbly kawaii anime girl who uses cute phrases like 'nya~' and 'kya~'." } def get_system_prompt(persona: str, deep_think: bool) -> str: """Creates the system prompt based on selected persona and deep think mode.""" persona_desc = ANIME_PERSONAS.get(persona, ANIME_PERSONAS["default"]) today = datetime.now().strftime("%A, %B %d, %Y") deep_think_prompt = "" if deep_think: deep_think_prompt = ( "\n**DEEP THINK MODE ACTIVATED:** You must provide a comprehensive, " "step-by-step, well-reasoned answer. Deconstruct the query, " "analyze it, and then synthesize a thorough response." ) # This prompt is engineered to guide the local model return f""" <|system|> {persona_desc} Today is {today}. Your instructions are to be a helpful assistant. If you need to use a tool to answer a question (like for current weather or the date), you must first state your intention in the format: [TOOL: function_name(args)]. For example: [TOOL: get_weather(city='Boksburg')]. After you state the tool, stop your response. You will then be given the tool's output to formulate your final answer. {deep_think_prompt} <|end|> """ # --- Tool Definitions --- def get_date(): """Returns the current date.""" return {"date": datetime.now().strftime("%Y-%m-%d, %A")} async def get_weather(city: str): """Gets the current weather for a specified city.""" if not WEATHER_API_KEY: return {"error": "Weather API key is not configured."} url = f"http://api.weatherapi.com/v1/current.json?key={WEATHER_API_KEY}&q={city}" try: async with httpx.AsyncClient() as client: res = await client.get(url) res.raise_for_status() data = res.json() return { "location": data["location"]["name"], "condition": data["current"]["condition"]["text"], "temperature_c": data["current"]["temp_c"] } except Exception as e: logging.error(f"Weather API error for {city}: {e}") return {"error": f"Failed to fetch weather for {city}."} # Simple keyword-based tool dispatcher async def execute_tool_if_needed(text: str): """Parses the AI's output to see if it wants to use a tool.""" tool_match = re.search(r"\[TOOL:\s*(\w+)\((.*?)\)\s*\]", text) if not tool_match: return None, None # No tool call found tool_name = tool_match.group(1) tool_args_str = tool_match.group(2) # Simple argument parsing args = {} if tool_args_str: try: # Parses arguments like city='Boksburg' args = dict(arg.strip().split('=') for arg in tool_args_str.split(',')) args = {k: v.strip("'\"") for k, v in args.items()} except ValueError: logging.warning(f"Could not parse arguments for tool {tool_name}") return None, f"Error parsing arguments for {tool_name}" logging.info(f"Executing tool: {tool_name} with args: {args}") if tool_name == "get_current_date": return tool_name, get_current_date() elif tool_name == "get_weather": city = args.get('city') if city: return tool_name, await get_weather(city) else: return tool_name, {"error": "City was not specified."} return None, f"Tool '{tool_name}' not recognized." # --- FastAPI App --- app = FastAPI(title="NeuraSelf - Independent AI") class ChatRequest(BaseModel): user_id: str = Field(..., example="user123") message: str = Field(..., example="What is the weather like in Boksburg?") persona: str = Field("default", example="tsundere") deep_think: bool = Field(False, example=True) @app.post("/chat/") async def chat(request: ChatRequest = Body(...)): """ Main chat endpoint for the self-hosted NeuraSelf AI. """ if not local_ai_pipeline: raise HTTPException(status_code=503, detail="Local AI model is not available or failed to load.") # 1. Create the system prompt system_prompt = get_system_prompt(request.persona, request.deep_think) # For this self-contained example, we'll use a simple in-memory history # In a real app, you would use MongoDB as in your original file messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": request.message} ] # 2. Generate the initial AI response to see if it needs a tool prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) generation_args = { "max_new_tokens": 100, # Keep the first pass short "return_full_text": False, "temperature": 0.1, # Low temperature to reliably generate the tool format "do_sample": True, } initial_output = local_ai_pipeline(prompt, **generation_args)[0]['generated_text'] # 3. Check for and execute tools tool_name, tool_result = await execute_tool_if_needed(initial_output) # 4. Generate the final response if tool_name: logging.info(f"Tool '{tool_name}' returned: {tool_result}") # Add the tool result to the conversation history for the AI to see tool_message = f"<|user|>\nOK. Here is the result from the tool '{tool_name}': {json.dumps(tool_result)}. Now, please formulate a natural language response to the original question, in character.<|end|>" messages.append({"role": "assistant", "content": initial_output}) # Add the AI's tool request messages.append({"role": "user", "content": tool_message}) # Add the tool result as a user message final_prompt = local_ai_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) final_generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.7, # Higher temperature for a more natural final answer "do_sample": True, } final_output = local_ai_pipeline(final_prompt, **final_generation_args)[0]['generated_text'] return {"response": final_output} else: # If no tool was needed, the initial response is the final one return {"response": initial_output}