from fastapi import FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import torch import os import re import secrets from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from duckduckgo_search import DDGS app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # --- Specialist DB --- API_KEYS_DB = { "ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active"}, "ELE-PRIME-VOID-X": {"limit": 50000, "used": 0, "status": "active"} } ADMIN_SECRET = "MINZO-SECRET-2026" # --- MiMo-Audio 7B Optimization --- model_id = "XiaomiMiMo/MiMo-Audio-7B-Instruct" print(f"🔱 INACHI-CORE: Deploying Multimodal Engine {model_id}...") # 4-bit Quantization එකතු කිරීම (CPU/RAM ඉතිරි කිරීමට) quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16 ) tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, quantization_config=quant_config, device_map="auto" # මෙය ස්වයංක්‍රීයව RAM එක කළමනාකරණය කරයි ) class AdminRequest(BaseModel): admin_pass: str limit: int = 5000 @app.post("/v1/chat") async def chat(message: dict, x_api_key: str = Header(None)): if not x_api_key or x_api_key not in API_KEYS_DB: raise HTTPException(status_code=403, detail="Invalid Key") query = message.get("query", "") # MiMo පද්ධතියේ System Prompt එක system_instruction = ( "You are Inachi-Prime, an Any-to-Any multimodal AI assistant. " "You are designed by Specialist MINZO-PRIME. " "Respond accurately in Sinhala or English based on user input." ) inputs = tokenizer(f"{system_instruction}\n\nUser: {query}\nAssistant:", return_tensors="pt").to("cpu") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=512, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id ) ans = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip() API_KEYS_DB[x_api_key]["used"] += 1 return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]} main = app