# Use RunPod's PyTorch image (Python 3.11, CUDA 12.4)
# PyTorch 2.6 stable supports sm_89 (RTX 4090) and sm_90 (H100)
# For RTX 5090 (sm_120 Blackwell), use a different base image or nightly build
FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04

ARG CONF_LLM_MODEL=mistral
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV CONF_LLM_MODEL=${CONF_LLM_MODEL}

WORKDIR /app

# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg libsndfile1 sox libsox-dev curl \
    && rm -rf /var/lib/apt/lists/*

# Upgrade PyTorch to 2.6 stable (CUDA 12.4, supports sm_89/sm_90)
RUN pip install --no-cache-dir --upgrade torch torchvision torchaudio \
    --index-url https://download.pytorch.org/whl/cu124

# Python dependencies
RUN pip install --no-cache-dir \
    "faster-whisper>=1.1.0" \
    "fastapi>=0.115.0" "uvicorn[standard]>=0.32.0" \
    python-multipart httpx soundfile numpy \
    "huggingface-hub>=0.26.0" hf_transfer \
    "pydantic-settings>=2.0" websockets

# Install llama-cpp-python with CUDA support
RUN pip install --no-cache-dir llama-cpp-python \
    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
    pip install --no-cache-dir "llama-cpp-python[server]"

# Install faster-qwen3-tts (CUDA graphs, 5-6x faster than qwen-tts)
# qwen-tts 0.1.1 pins transformers==4.57.3 but its code uses 5.x symbols.
# We keep 4.57.3 and backport the 3 missing symbols in tts.py (simpler than
# upgrading transformers which cascades into torch/torchvision/RoPE breakage).
RUN pip install --no-cache-dir "transformers==4.57.3" "accelerate>=1.12.0" \
    librosa einops onnxruntime sox && \
    pip install --no-cache-dir --no-deps "qwen-tts>=0.1.1" && \
    pip install --no-cache-dir --no-deps "faster-qwen3-tts>=0.2.1"

# Speaker verification for voice cloning (identifies target speaker)
# pyannote.audio pulls torchvision from PyPI (CPU-only), overriding the CUDA version.
# Re-install torch/torchvision/torchaudio from cu124 index to fix the mismatch.
RUN pip install --no-cache-dir pyannote.audio && \
    pip install --no-cache-dir --upgrade torch torchvision torchaudio \
    --index-url https://download.pytorch.org/whl/cu124

# Cleanup
RUN rm -rf /root/.cache/pip /tmp/pip-*

# Copy application code
COPY api/ /app/api/
COPY start.sh /app/start.sh
RUN chmod +x /app/start.sh

EXPOSE 8000

HEALTHCHECK --interval=30s --timeout=10s --retries=5 \
    CMD curl -f http://localhost:8000/health || exit 1

# Models downloaded on first boot (fast with hf_transfer)
CMD ["/app/start.sh"]