File size: 4,635 Bytes
64b6538 440898e 64b6538 4fed00b 440898e a96588e 440898e 3b28115 a96588e 3b28115 440898e 4fed00b 440898e 4fed00b 440898e 4fed00b 64b6538 440898e 4fed00b 440898e 64b6538 440898e a96588e 440898e 64b6538 4fed00b 64b6538 4fed00b 440898e 64b6538 4fed00b 64b6538 4fed00b 64b6538 4fed00b 64b6538 440898e 3b28115 440898e 4fed00b a96588e 440898e 4fed00b 64b6538 4fed00b 4190d66 4fed00b a96588e 440898e 4fed00b 440898e 4fed00b a96588e 3b28115 a96588e 4fed00b 3b28115 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | # app.py — OLLAMA SERVER (HF SPACE) — V18 FINAL
"""
AKIRA V18 — OLLAMA SERVER
- Recebe: prompt, numero, usuario, mensagem, mensagem_citada, historico
- Envia prompt completo para Ollama
- Responde com {"resposta": "..."}
"""
import subprocess
import time
import requests
import sys
import os
from flask import Flask, request, jsonify
from loguru import logger
app = Flask(__name__)
OLLAMA_URL = "http://localhost:11434"
# === CONFIGURAÇÃO OLLAMA ===
os.environ["OLLAMA_NUM_PARALLEL"] = "3"
os.environ["OLLAMA_MAX_QUEUE"] = "10"
os.environ["OLLAMA_KEEP_ALIVE"] = "10m"
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"
# === INICIA OLLAMA ===
def start_ollama():
logger.info("Iniciando ollama serve...")
subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def check_ollama():
try:
resp = requests.get(f"{OLLAMA_URL}/api/tags", timeout=10)
return resp.status_code == 200
except:
return False
def wait_for_ollama():
logger.info("Aguardando Ollama (11434)...")
for i in range(60):
if check_ollama():
logger.success("Ollama PRONTO! qwen2.5:3b-instruct-q4_0")
return True
time.sleep(2)
logger.critical("Ollama não subiu!")
return False
# === RAIZ ===
@app.route("/")
def index():
return '''
<div style="font-family: monospace; text-align: center; margin: 50px; background: #000; color: #0f0; padding: 40px;">
<h1>AKIRA V18 OLLAMA SERVER</h1>
<p><strong>qwen2.5:3b-instruct-q4_0</strong></p>
<p><strong>Recebe: prompt, numero, mensagem, mensagem_citada, historico</strong></p>
<p><code>POST /api/generate</code></p>
<p>Health: <a href="/health">/health</a></p>
</div>
''', 200
@app.route("/health")
def health():
return jsonify({"status": "OK" if check_ollama() else "INICIANDO..."}), 200
# === ROTA PRINCIPAL: /api/generate ===
@app.route("/api/generate", methods=['POST'])
def generate():
if not check_ollama():
return jsonify({'resposta': 'Epá, tô acordando... espera 10s!'}), 503
data = request.get_json() or {}
# === RECEBE TUDO QUE O api.py ENVIA ===
prompt = data.get('prompt', '').strip()
numero = data.get('numero', '')
usuario = data.get('usuario', 'anonimo')
mensagem = data.get('mensagem', '').strip()
mensagem_citada = data.get('mensagem_citada', '').strip()
historico = data.get('historico', [])
if not prompt:
return jsonify({'error': 'prompt obrigatório'}), 400
# === LOG COMPLETO ===
logger.info(f"Recebido de {usuario} ({numero})")
logger.info(f"Mensagem: {mensagem[:60]}")
if mensagem_citada:
logger.info(f"Reply: {mensagem_citada[:60]}")
logger.info(f"Histórico: {len(historico)} mensagens")
logger.info(f"Prompt: {prompt[:120]}...")
# === ENVIA PROMPT GIGANTE PARA OLLAMA ===
payload = {
"model": "qwen2.5:3b-instruct-q4_0",
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.9,
"num_predict": 256,
"num_ctx": 2048,
"num_thread": 2
}
}
try:
resp = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=120)
if resp.status_code == 200:
resposta = resp.json().get("response", "").strip()
if resposta:
logger.success(f"Resposta: {resposta[:60]}...")
return jsonify({'resposta': resposta})
else:
return jsonify({'resposta': 'Epá, fiquei sem palavras... manda de novo!'})
else:
logger.warning(f"Ollama erro {resp.status_code}: {resp.text}")
return jsonify({'resposta': 'Epá, tô com problema... tenta de novo!'}), 500
except Exception as e:
logger.error(f"Erro Ollama: {e}")
return jsonify({'resposta': 'Epá, tô off... volta já!'}), 500
# === INÍCIO ===
if __name__ == "__main__":
logger.remove()
logger.add(sys.stderr, format="<green>{time:HH:mm:ss}</green> → <level>{message}</level>")
logger.info("AKIRA V18 — OLLAMA SERVER INICIANDO...")
start_ollama()
if not wait_for_ollama():
sys.exit(1)
logger.info("Warming up...")
try:
requests.post(f"{OLLAMA_URL}/api/generate", json={
"model": "qwen2.5:3b-instruct-q4_0",
"prompt": "Oi",
"options": {"num_predict": 1}
}, timeout=60)
logger.success("Akira aquecida!")
except:
pass
logger.info("Flask na porta 7860")
app.run(host="0.0.0.0", port=7860, debug=False) |