File size: 4,635 Bytes
64b6538
440898e
64b6538
 
4fed00b
 
440898e
 
 
 
 
a96588e
440898e
 
 
 
 
 
3b28115
 
 
 
a96588e
 
3b28115
440898e
4fed00b
 
 
440898e
 
 
 
 
 
 
 
 
 
 
4fed00b
440898e
 
 
 
 
 
 
 
 
 
4fed00b
 
64b6538
 
440898e
 
 
 
 
 
4fed00b
440898e
64b6538
 
 
440898e
a96588e
440898e
 
64b6538
 
 
4fed00b
64b6538
4fed00b
 
 
440898e
64b6538
 
4fed00b
64b6538
 
 
4fed00b
64b6538
 
 
4fed00b
64b6538
440898e
3b28115
440898e
 
 
4fed00b
 
a96588e
 
440898e
 
 
4fed00b
64b6538
4fed00b
 
 
 
 
4190d66
4fed00b
 
 
 
 
 
 
a96588e
440898e
 
 
 
4fed00b
440898e
 
 
4fed00b
a96588e
 
3b28115
a96588e
 
 
 
 
4fed00b
 
3b28115
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# app.py — OLLAMA SERVER (HF SPACE) — V18 FINAL
"""
AKIRA V18 — OLLAMA SERVER
- Recebe: prompt, numero, usuario, mensagem, mensagem_citada, historico
- Envia prompt completo para Ollama
- Responde com {"resposta": "..."}
"""
import subprocess
import time
import requests
import sys
import os
from flask import Flask, request, jsonify
from loguru import logger

app = Flask(__name__)
OLLAMA_URL = "http://localhost:11434"

# === CONFIGURAÇÃO OLLAMA ===
os.environ["OLLAMA_NUM_PARALLEL"] = "3"
os.environ["OLLAMA_MAX_QUEUE"] = "10"
os.environ["OLLAMA_KEEP_ALIVE"] = "10m"
os.environ["OLLAMA_MAX_LOADED_MODELS"] = "1"

# === INICIA OLLAMA ===
def start_ollama():
    logger.info("Iniciando ollama serve...")
    subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def check_ollama():
    try:
        resp = requests.get(f"{OLLAMA_URL}/api/tags", timeout=10)
        return resp.status_code == 200
    except:
        return False

def wait_for_ollama():
    logger.info("Aguardando Ollama (11434)...")
    for i in range(60):
        if check_ollama():
            logger.success("Ollama PRONTO! qwen2.5:3b-instruct-q4_0")
            return True
        time.sleep(2)
    logger.critical("Ollama não subiu!")
    return False

# === RAIZ ===
@app.route("/")
def index():
    return '''
    <div style="font-family: monospace; text-align: center; margin: 50px; background: #000; color: #0f0; padding: 40px;">
        <h1>AKIRA V18 OLLAMA SERVER</h1>
        <p><strong>qwen2.5:3b-instruct-q4_0</strong></p>
        <p><strong>Recebe: prompt, numero, mensagem, mensagem_citada, historico</strong></p>
        <p><code>POST /api/generate</code></p>
        <p>Health: <a href="/health">/health</a></p>
    </div>
    ''', 200

@app.route("/health")
def health():
    return jsonify({"status": "OK" if check_ollama() else "INICIANDO..."}), 200

# === ROTA PRINCIPAL: /api/generate ===
@app.route("/api/generate", methods=['POST'])
def generate():
    if not check_ollama():
        return jsonify({'resposta': 'Epá, tô acordando... espera 10s!'}), 503

    data = request.get_json() or {}
    
    # === RECEBE TUDO QUE O api.py ENVIA ===
    prompt = data.get('prompt', '').strip()
    numero = data.get('numero', '')
    usuario = data.get('usuario', 'anonimo')
    mensagem = data.get('mensagem', '').strip()
    mensagem_citada = data.get('mensagem_citada', '').strip()
    historico = data.get('historico', [])

    if not prompt:
        return jsonify({'error': 'prompt obrigatório'}), 400

    # === LOG COMPLETO ===
    logger.info(f"Recebido de {usuario} ({numero})")
    logger.info(f"Mensagem: {mensagem[:60]}")
    if mensagem_citada:
        logger.info(f"Reply: {mensagem_citada[:60]}")
    logger.info(f"Histórico: {len(historico)} mensagens")
    logger.info(f"Prompt: {prompt[:120]}...")

    # === ENVIA PROMPT GIGANTE PARA OLLAMA ===
    payload = {
        "model": "qwen2.5:3b-instruct-q4_0",
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.9,
            "num_predict": 256,
            "num_ctx": 2048,
            "num_thread": 2
        }
    }

    try:
        resp = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=120)
        if resp.status_code == 200:
            resposta = resp.json().get("response", "").strip()
            if resposta:
                logger.success(f"Resposta: {resposta[:60]}...")
                return jsonify({'resposta': resposta})
            else:
                return jsonify({'resposta': 'Epá, fiquei sem palavras... manda de novo!'})
        else:
            logger.warning(f"Ollama erro {resp.status_code}: {resp.text}")
            return jsonify({'resposta': 'Epá, tô com problema... tenta de novo!'}), 500
    except Exception as e:
        logger.error(f"Erro Ollama: {e}")
        return jsonify({'resposta': 'Epá, tô off... volta já!'}), 500

# === INÍCIO ===
if __name__ == "__main__":
    logger.remove()
    logger.add(sys.stderr, format="<green>{time:HH:mm:ss}</green> → <level>{message}</level>")
    logger.info("AKIRA V18 — OLLAMA SERVER INICIANDO...")
    start_ollama()
    if not wait_for_ollama():
        sys.exit(1)
    logger.info("Warming up...")
    try:
        requests.post(f"{OLLAMA_URL}/api/generate", json={
            "model": "qwen2.5:3b-instruct-q4_0",
            "prompt": "Oi",
            "options": {"num_predict": 1}
        }, timeout=60)
        logger.success("Akira aquecida!")
    except:
        pass
    logger.info("Flask na porta 7860")
    app.run(host="0.0.0.0", port=7860, debug=False)