Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI, Header, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import torch | |
| import os | |
| import re | |
| import secrets | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from duckduckgo_search import DDGS | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --- Specialist DB --- | |
| API_KEYS_DB = { | |
| "ELE-PRIME-ADMIN-SYS": {"limit": 100000, "used": 0, "status": "active"}, | |
| "ELE-PRIME-VOID-X": {"limit": 50000, "used": 0, "status": "active"} | |
| } | |
| ADMIN_SECRET = "MINZO-SECRET-2026" | |
| # --- MiMo-Audio 7B Optimization --- | |
| model_id = "XiaomiMiMo/MiMo-Audio-7B-Instruct" | |
| print(f"🔱 INACHI-CORE: Deploying Multimodal Engine {model_id}...") | |
| # 4-bit Quantization එකතු කිරීම (CPU/RAM ඉතිරි කිරීමට) | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| quantization_config=quant_config, | |
| device_map="auto" # මෙය ස්වයංක්රීයව RAM එක කළමනාකරණය කරයි | |
| ) | |
| class AdminRequest(BaseModel): | |
| admin_pass: str | |
| limit: int = 5000 | |
| async def chat(message: dict, x_api_key: str = Header(None)): | |
| if not x_api_key or x_api_key not in API_KEYS_DB: | |
| raise HTTPException(status_code=403, detail="Invalid Key") | |
| query = message.get("query", "") | |
| # MiMo පද්ධතියේ System Prompt එක | |
| system_instruction = ( | |
| "You are Inachi-Prime, an Any-to-Any multimodal AI assistant. " | |
| "You are designed by Specialist MINZO-PRIME. " | |
| "Respond accurately in Sinhala or English based on user input." | |
| ) | |
| inputs = tokenizer(f"{system_instruction}\n\nUser: {query}\nAssistant:", return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| ans = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip() | |
| API_KEYS_DB[x_api_key]["used"] += 1 | |
| return {"reply": ans, "usage": API_KEYS_DB[x_api_key]["used"]} | |
| main = app |