babelcast-mistral / Dockerfile
marcosremar2's picture
Upload folder using huggingface_hub
1e91d30 verified
# Use RunPod's PyTorch image (Python 3.11, CUDA 12.4)
# PyTorch 2.6 stable supports sm_89 (RTX 4090) and sm_90 (H100)
# For RTX 5090 (sm_120 Blackwell), use a different base image or nightly build
FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04
ARG CONF_LLM_MODEL=mistral
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV CONF_LLM_MODEL=${CONF_LLM_MODEL}
WORKDIR /app
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg libsndfile1 sox libsox-dev curl \
&& rm -rf /var/lib/apt/lists/*
# Upgrade PyTorch to 2.6 stable (CUDA 12.4, supports sm_89/sm_90)
RUN pip install --no-cache-dir --upgrade torch torchvision torchaudio \
--index-url https://download.pytorch.org/whl/cu124
# Python dependencies
RUN pip install --no-cache-dir \
"faster-whisper>=1.1.0" \
"fastapi>=0.115.0" "uvicorn[standard]>=0.32.0" \
python-multipart httpx soundfile numpy \
"huggingface-hub>=0.26.0" hf_transfer \
"pydantic-settings>=2.0" websockets
# Install llama-cpp-python with CUDA support
RUN pip install --no-cache-dir llama-cpp-python \
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
pip install --no-cache-dir "llama-cpp-python[server]"
# Install faster-qwen3-tts (CUDA graphs, 5-6x faster than qwen-tts)
# qwen-tts 0.1.1 pins transformers==4.57.3 but its code uses 5.x symbols.
# We keep 4.57.3 and backport the 3 missing symbols in tts.py (simpler than
# upgrading transformers which cascades into torch/torchvision/RoPE breakage).
RUN pip install --no-cache-dir "transformers==4.57.3" "accelerate>=1.12.0" \
librosa einops onnxruntime sox && \
pip install --no-cache-dir --no-deps "qwen-tts>=0.1.1" && \
pip install --no-cache-dir --no-deps "faster-qwen3-tts>=0.2.1"
# Speaker verification for voice cloning (identifies target speaker)
# pyannote.audio pulls torchvision from PyPI (CPU-only), overriding the CUDA version.
# Re-install torch/torchvision/torchaudio from cu124 index to fix the mismatch.
RUN pip install --no-cache-dir pyannote.audio && \
pip install --no-cache-dir --upgrade torch torchvision torchaudio \
--index-url https://download.pytorch.org/whl/cu124
# Cleanup
RUN rm -rf /root/.cache/pip /tmp/pip-*
# Copy application code
COPY api/ /app/api/
COPY start.sh /app/start.sh
RUN chmod +x /app/start.sh
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=10s --retries=5 \
CMD curl -f http://localhost:8000/health || exit 1
# Models downloaded on first boot (fast with hf_transfer)
CMD ["/app/start.sh"]