Spaces:

marcosremar2
/

babelcast-mistral

Runtime error

App Files Files Community

babelcast-mistral / Dockerfile

marcosremar2

Upload folder using huggingface_hub

1e91d30 verified about 1 month ago

raw

history blame contribute delete

2.57 kB

	# Use RunPod's PyTorch image (Python 3.11, CUDA 12.4)
	# PyTorch 2.6 stable supports sm_89 (RTX 4090) and sm_90 (H100)
	# For RTX 5090 (sm_120 Blackwell), use a different base image or nightly build
	FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04

	ARG CONF_LLM_MODEL=mistral
	ENV PYTHONUNBUFFERED=1
	ENV PYTHONDONTWRITEBYTECODE=1
	ENV CONF_LLM_MODEL=${CONF_LLM_MODEL}

	WORKDIR /app

	# System dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	ffmpeg libsndfile1 sox libsox-dev curl \
	&& rm -rf /var/lib/apt/lists/*

	# Upgrade PyTorch to 2.6 stable (CUDA 12.4, supports sm_89/sm_90)
	RUN pip install --no-cache-dir --upgrade torch torchvision torchaudio \
	--index-url https://download.pytorch.org/whl/cu124

	# Python dependencies
	RUN pip install --no-cache-dir \
	"faster-whisper>=1.1.0" \
	"fastapi>=0.115.0" "uvicorn[standard]>=0.32.0" \
	python-multipart httpx soundfile numpy \
	"huggingface-hub>=0.26.0" hf_transfer \
	"pydantic-settings>=2.0" websockets

	# Install llama-cpp-python with CUDA support
	RUN pip install --no-cache-dir llama-cpp-python \
	--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
	pip install --no-cache-dir "llama-cpp-python[server]"

	# Install faster-qwen3-tts (CUDA graphs, 5-6x faster than qwen-tts)
	# qwen-tts 0.1.1 pins transformers==4.57.3 but its code uses 5.x symbols.
	# We keep 4.57.3 and backport the 3 missing symbols in tts.py (simpler than
	# upgrading transformers which cascades into torch/torchvision/RoPE breakage).
	RUN pip install --no-cache-dir "transformers==4.57.3" "accelerate>=1.12.0" \
	librosa einops onnxruntime sox && \
	pip install --no-cache-dir --no-deps "qwen-tts>=0.1.1" && \
	pip install --no-cache-dir --no-deps "faster-qwen3-tts>=0.2.1"

	# Speaker verification for voice cloning (identifies target speaker)
	# pyannote.audio pulls torchvision from PyPI (CPU-only), overriding the CUDA version.
	# Re-install torch/torchvision/torchaudio from cu124 index to fix the mismatch.
	RUN pip install --no-cache-dir pyannote.audio && \
	pip install --no-cache-dir --upgrade torch torchvision torchaudio \
	--index-url https://download.pytorch.org/whl/cu124

	# Cleanup
	RUN rm -rf /root/.cache/pip /tmp/pip-*

	# Copy application code
	COPY api/ /app/api/
	COPY start.sh /app/start.sh
	RUN chmod +x /app/start.sh

	EXPOSE 8000

	HEALTHCHECK --interval=30s --timeout=10s --retries=5 \
	CMD curl -f http://localhost:8000/health \|\| exit 1

	# Models downloaded on first boot (fast with hf_transfer)
	CMD ["/app/start.sh"]