File size: 1,018 Bytes
daa7341
05f67c9
cecf722
05f67c9
 
 
daa7341
 
395527b
cecf722
05f67c9
 
cecf722
05f67c9
cecf722
 
05f67c9
 
 
 
 
cecf722
 
05f67c9
3501991
 
05f67c9
daa7341
05f67c9
395527b
daa7341
 
395527b
05f67c9
ac0321c
cecf722
395527b
cecf722
daa7341
 
 
 
 
 
395527b
cecf722
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Dockerfile
FROM python:3.11-slim

# INSTALA DEPENDÊNCIAS DO SISTEMA
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/*

# BAIXA E INSTALA OLLAMA
RUN curl -fsSL https://ollama.com/install.sh | sh

# CRIA DIRETÓRIO
WORKDIR /app

# COPIA ARQUIVOS
COPY requirements.txt .
COPY app.py .

# INSTALA PYTHON DEPENDÊNCIAS
RUN pip install --no-cache-dir -r requirements.txt

# PORTAS
EXPOSE 7860
EXPOSE 11434

# === PUXA MODELO LEVE + QUANTIZADO DURANTE BUILD ===
RUN ollama serve & \
    OLLAMA_PID=$! && \
    sleep 20 && \
    ollama pull qwen2.5:3b-q4_0 && \
    kill $OLLAMA_PID || true

# HEALTHCHECK
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# VARIÁVEIS DE AMBIENTE (OTIMIZAÇÃO)
ENV OLLAMA_NUM_PARALLEL=3
ENV OLLAMA_MAX_QUEUE=10
ENV OLLAMA_KEEP_ALIVE=10m
ENV OLLAMA_MAX_LOADED_MODELS=1

# INICIA app.py
CMD ["python", "app.py"]