Inachi-ai-2 / app.py
MINZO4546's picture
Update app.py
c38e36c verified
from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import torch
import os
import re
import secrets
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from duckduckgo_search import DDGS
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# --- Specialist DB ---
API_KEYS_DB = {
"ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active"},
"ELE-PRIME-VOID-X": {"limit": 50000, "used": 0, "status": "active"}
}
ADMIN_SECRET = "MINZO-SECRET-2026"
# --- MiMo-Audio 7B Optimization ---
model_id = "XiaomiMiMo/MiMo-Audio-7B-Instruct"
print(f"🔱 INACHI-CORE: Deploying Multimodal Engine {model_id}...")
# 4-bit Quantization එකතු කිරීම (CPU/RAM ඉතිරි කිරීමට)
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=quant_config,
device_map="auto" # මෙය ස්වයංක්‍රීයව RAM එක කළමනාකරණය කරයි
)
class AdminRequest(BaseModel):
admin_pass: str
limit: int = 5000
@app.post("/v1/chat")
async def chat(message: dict, x_api_key: str = Header(None)):
if not x_api_key or x_api_key not in API_KEYS_DB:
raise HTTPException(status_code=403, detail="Invalid Key")
query = message.get("query", "")
# MiMo පද්ධතියේ System Prompt එක
system_instruction = (
"You are Inachi-Prime, an Any-to-Any multimodal AI assistant. "
"You are designed by Specialist MINZO-PRIME. "
"Respond accurately in Sinhala or English based on user input."
)
inputs = tokenizer(f"{system_instruction}\n\nUser: {query}\nAssistant:", return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
ans = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
API_KEYS_DB[x_api_key]["used"] += 1
return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]}
main = app