Spaces:
Runtime error
Runtime error
| # Use RunPod's PyTorch image (Python 3.11, CUDA 12.4) | |
| # PyTorch 2.6 stable supports sm_89 (RTX 4090) and sm_90 (H100) | |
| # For RTX 5090 (sm_120 Blackwell), use a different base image or nightly build | |
| FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04 | |
| ARG CONF_LLM_MODEL=mistral | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PYTHONDONTWRITEBYTECODE=1 | |
| ENV CONF_LLM_MODEL=${CONF_LLM_MODEL} | |
| WORKDIR /app | |
| # System dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| ffmpeg libsndfile1 sox libsox-dev curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Upgrade PyTorch to 2.6 stable (CUDA 12.4, supports sm_89/sm_90) | |
| RUN pip install --no-cache-dir --upgrade torch torchvision torchaudio \ | |
| --index-url https://download.pytorch.org/whl/cu124 | |
| # Python dependencies | |
| RUN pip install --no-cache-dir \ | |
| "faster-whisper>=1.1.0" \ | |
| "fastapi>=0.115.0" "uvicorn[standard]>=0.32.0" \ | |
| python-multipart httpx soundfile numpy \ | |
| "huggingface-hub>=0.26.0" hf_transfer \ | |
| "pydantic-settings>=2.0" websockets | |
| # Install llama-cpp-python with CUDA support | |
| RUN pip install --no-cache-dir llama-cpp-python \ | |
| --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 && \ | |
| pip install --no-cache-dir "llama-cpp-python[server]" | |
| # Install faster-qwen3-tts (CUDA graphs, 5-6x faster than qwen-tts) | |
| # qwen-tts 0.1.1 pins transformers==4.57.3 but its code uses 5.x symbols. | |
| # We keep 4.57.3 and backport the 3 missing symbols in tts.py (simpler than | |
| # upgrading transformers which cascades into torch/torchvision/RoPE breakage). | |
| RUN pip install --no-cache-dir "transformers==4.57.3" "accelerate>=1.12.0" \ | |
| librosa einops onnxruntime sox && \ | |
| pip install --no-cache-dir --no-deps "qwen-tts>=0.1.1" && \ | |
| pip install --no-cache-dir --no-deps "faster-qwen3-tts>=0.2.1" | |
| # Speaker verification for voice cloning (identifies target speaker) | |
| # pyannote.audio pulls torchvision from PyPI (CPU-only), overriding the CUDA version. | |
| # Re-install torch/torchvision/torchaudio from cu124 index to fix the mismatch. | |
| RUN pip install --no-cache-dir pyannote.audio && \ | |
| pip install --no-cache-dir --upgrade torch torchvision torchaudio \ | |
| --index-url https://download.pytorch.org/whl/cu124 | |
| # Cleanup | |
| RUN rm -rf /root/.cache/pip /tmp/pip-* | |
| # Copy application code | |
| COPY api/ /app/api/ | |
| COPY start.sh /app/start.sh | |
| RUN chmod +x /app/start.sh | |
| EXPOSE 8000 | |
| HEALTHCHECK --interval=30s --timeout=10s --retries=5 \ | |
| CMD curl -f http://localhost:8000/health || exit 1 | |
| # Models downloaded on first boot (fast with hf_transfer) | |
| CMD ["/app/start.sh"] | |