Fix HF exec entrypoint: generate entrypoint.sh in Docker; body in entrypoint_body.sh
Browse files- Dockerfile +14 -5
- app.py +3 -3
- scripts/entrypoint.sh +0 -42
- scripts/entrypoint_body.sh +98 -0
- space_health.py +6 -3
Dockerfile
CHANGED
|
@@ -15,10 +15,21 @@ COPY vendor/rllm /app/vendor/rllm
|
|
| 15 |
COPY requirements.txt /app/requirements.txt
|
| 16 |
COPY app.py space_gen.py space_health.py /app/
|
| 17 |
COPY services /app/services
|
| 18 |
-
COPY scripts /app/scripts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
ENV PYTHONPATH=/app/vendor/rllm
|
| 21 |
ENV GRADIO_SERVER_PORT=7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
| 24 |
&& pip install --no-cache-dir -e /app/vendor/rllm \
|
|
@@ -28,9 +39,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
|
| 28 |
ARG INSTALL_VLLM=1
|
| 29 |
RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
|
| 30 |
|
| 31 |
-
# Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
|
| 32 |
-
RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
|
| 33 |
-
|
| 34 |
EXPOSE 7860
|
| 35 |
|
| 36 |
-
|
|
|
|
|
|
| 15 |
COPY requirements.txt /app/requirements.txt
|
| 16 |
COPY app.py space_gen.py space_health.py /app/
|
| 17 |
COPY services /app/services
|
| 18 |
+
COPY scripts/entrypoint_body.sh scripts/verify_env.py /app/scripts/
|
| 19 |
+
# HF may exec /app/scripts/entrypoint.sh directly — never COPY it from Windows (CRLF → "exec format error").
|
| 20 |
+
# Build a 2-line LF-only stub; normalize body script bytes inside Linux.
|
| 21 |
+
RUN python3 -c "import pathlib; p=pathlib.Path('/app/scripts/entrypoint_body.sh'); b=p.read_bytes(); b=b.lstrip(b'\xef\xbb\xbf'); b=b.replace(b'\r\n', b'\n').replace(b'\r', b''); p.write_bytes(b)" \
|
| 22 |
+
&& printf '%s\n' '#!/bin/bash' 'exec /bin/bash /app/scripts/entrypoint_body.sh' > /app/scripts/entrypoint.sh \
|
| 23 |
+
&& chmod +x /app/scripts/entrypoint.sh /app/scripts/entrypoint_body.sh
|
| 24 |
|
| 25 |
ENV PYTHONPATH=/app/vendor/rllm
|
| 26 |
ENV GRADIO_SERVER_PORT=7860
|
| 27 |
+
# HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
|
| 28 |
+
# getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
|
| 29 |
+
ENV USER=huggingface
|
| 30 |
+
ENV LOGNAME=huggingface
|
| 31 |
+
ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
|
| 32 |
+
ENV TRITON_CACHE_DIR=/tmp/triton_cache
|
| 33 |
|
| 34 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
| 35 |
&& pip install --no-cache-dir -e /app/vendor/rllm \
|
|
|
|
| 39 |
ARG INSTALL_VLLM=1
|
| 40 |
RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
EXPOSE 7860
|
| 43 |
|
| 44 |
+
# PID 1 is bash (real ELF). HF may ignore this and exec entrypoint.sh; that file is generated above with valid shebang.
|
| 45 |
+
CMD ["/bin/bash", "/app/scripts/entrypoint_body.sh"]
|
app.py
CHANGED
|
@@ -75,9 +75,9 @@ with gr.Blocks(title="GenSearcher + FireRed") as demo:
|
|
| 75 |
"## GenSearcher + FireRed-Image-Edit-1.1\n"
|
| 76 |
"Runs the **official** GenSearcher search/browse/image-search agent (vLLM), "
|
| 77 |
"then generates with **FireRed** via the same `/generate` API as the Qwen edit server.\n\n"
|
| 78 |
-
"**LLM
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
"**Search / browse (optional keys):** without `SERPER_KEY_ID` and `JINA_API_KEYS`, the agent uses **DuckDuckGo** "
|
| 82 |
"for web and image search and **direct HTTP** page fetch for visits. Set those secrets if you prefer Serper + Jina.\n\n"
|
| 83 |
"**Connection errors:** On Hugging Face Spaces, `http://127.0.0.1:8002/v1` only works if you run vLLM "
|
|
|
|
| 75 |
"## GenSearcher + FireRed-Image-Edit-1.1\n"
|
| 76 |
"Runs the **official** GenSearcher search/browse/image-search agent (vLLM), "
|
| 77 |
"then generates with **FireRed** via the same `/generate` API as the Qwen edit server.\n\n"
|
| 78 |
+
"**LLM:** Either run Gen-Searcher **in this same Space** (`START_VLLM_GENSEARCHER=1` → vLLM on localhost; "
|
| 79 |
+
"no second Space), **or** set `OPENAI_BASE_URL` to an OpenAI-compatible **`…/v1`** endpoint. "
|
| 80 |
+
"Browse summarization needs `BROWSE_SUMMARY_BASE_URL` when `BROWSE_GENERATE_ENGINE=vllm` (see README).\n\n"
|
| 81 |
"**Search / browse (optional keys):** without `SERPER_KEY_ID` and `JINA_API_KEYS`, the agent uses **DuckDuckGo** "
|
| 82 |
"for web and image search and **direct HTTP** page fetch for visits. Set those secrets if you prefer Serper + Jina.\n\n"
|
| 83 |
"**Connection errors:** On Hugging Face Spaces, `http://127.0.0.1:8002/v1` only works if you run vLLM "
|
scripts/entrypoint.sh
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
# Hugging Face Space (Docker) — GenSearcher + FireRed
|
| 2 |
-
# Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
|
| 3 |
-
|
| 4 |
-
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
|
| 5 |
-
|
| 6 |
-
ENV DEBIAN_FRONTEND=noninteractive
|
| 7 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
-
curl \
|
| 9 |
-
git \
|
| 10 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
-
|
| 12 |
-
WORKDIR /app
|
| 13 |
-
|
| 14 |
-
COPY vendor/rllm /app/vendor/rllm
|
| 15 |
-
COPY requirements.txt /app/requirements.txt
|
| 16 |
-
COPY app.py space_gen.py space_health.py /app/
|
| 17 |
-
COPY services /app/services
|
| 18 |
-
COPY scripts /app/scripts
|
| 19 |
-
# CRLF here causes "exec format error" when the container execs the script; strip immediately after copy.
|
| 20 |
-
RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
|
| 21 |
-
|
| 22 |
-
ENV PYTHONPATH=/app/vendor/rllm
|
| 23 |
-
ENV GRADIO_SERVER_PORT=7860
|
| 24 |
-
# HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
|
| 25 |
-
# getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
|
| 26 |
-
ENV USER=huggingface
|
| 27 |
-
ENV LOGNAME=huggingface
|
| 28 |
-
ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
|
| 29 |
-
ENV TRITON_CACHE_DIR=/tmp/triton_cache
|
| 30 |
-
|
| 31 |
-
RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
|
| 32 |
-
&& pip install --no-cache-dir -e /app/vendor/rllm \
|
| 33 |
-
&& pip install --no-cache-dir -r /app/requirements.txt
|
| 34 |
-
|
| 35 |
-
# Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
|
| 36 |
-
ARG INSTALL_VLLM=1
|
| 37 |
-
RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
|
| 38 |
-
|
| 39 |
-
EXPOSE 7860
|
| 40 |
-
|
| 41 |
-
# Invoke via bash so a bad shebang/CRLF cannot produce "exec format error" from the kernel.
|
| 42 |
-
CMD ["/bin/bash", "/app/scripts/entrypoint.sh"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/entrypoint_body.sh
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Main startup logic (run as: bash /app/scripts/entrypoint_body.sh).
|
| 2 |
+
# entrypoint.sh is generated in the Dockerfile so HF can exec it without CRLF/BOM issues.
|
| 3 |
+
set -euo pipefail
|
| 4 |
+
cd /app
|
| 5 |
+
|
| 6 |
+
# Same-container vLLM: PyTorch may call getpass.getuser() before USER is set in some runtimes.
|
| 7 |
+
export USER="${USER:-huggingface}"
|
| 8 |
+
export LOGNAME="${LOGNAME:-$USER}"
|
| 9 |
+
export TORCHINDUCTOR_CACHE_DIR="${TORCHINDUCTOR_CACHE_DIR:-/tmp/torch_inductor_cache}"
|
| 10 |
+
export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-/tmp/triton_cache}"
|
| 11 |
+
|
| 12 |
+
export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
|
| 13 |
+
|
| 14 |
+
# Optional: load Space secrets copied to this path
|
| 15 |
+
if [[ -f /app/.env.gen_image ]]; then
|
| 16 |
+
set -a
|
| 17 |
+
# shellcheck source=/dev/null
|
| 18 |
+
source /app/.env.gen_image
|
| 19 |
+
set +a
|
| 20 |
+
fi
|
| 21 |
+
|
| 22 |
+
if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then
|
| 23 |
+
case "${OPENAI_BASE_URL:-}" in
|
| 24 |
+
*127.0.0.1*|*localhost*)
|
| 25 |
+
echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1."
|
| 26 |
+
echo "[entrypoint] The GenSearcher agent will get 'Connection error' unless a server listens here,"
|
| 27 |
+
echo "[entrypoint] or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)."
|
| 28 |
+
;;
|
| 29 |
+
esac
|
| 30 |
+
if [[ -z "${OPENAI_BASE_URL:-}" ]]; then
|
| 31 |
+
echo "[entrypoint] OPENAI_BASE_URL is unset. For GenSearcher **inside this Space only**, set Space variable"
|
| 32 |
+
echo "[entrypoint] START_VLLM_GENSEARCHER=1 (entrypoint will start vLLM here and set OPENAI_BASE_URL to loopback)."
|
| 33 |
+
fi
|
| 34 |
+
fi
|
| 35 |
+
|
| 36 |
+
wait_http() {
|
| 37 |
+
local url=$1
|
| 38 |
+
local name=$2
|
| 39 |
+
local max_attempts=${3:-90}
|
| 40 |
+
local i=0
|
| 41 |
+
echo "[entrypoint] Waiting for ${name} (${url})..."
|
| 42 |
+
until curl -sf "$url" >/dev/null 2>&1; do
|
| 43 |
+
i=$((i + 1))
|
| 44 |
+
if [[ $i -ge $max_attempts ]]; then
|
| 45 |
+
echo "[entrypoint] Timeout waiting for ${name}"
|
| 46 |
+
exit 1
|
| 47 |
+
fi
|
| 48 |
+
sleep 2
|
| 49 |
+
done
|
| 50 |
+
echo "[entrypoint] ${name} is up."
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
|
| 54 |
+
# to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
|
| 55 |
+
|
| 56 |
+
# --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
|
| 57 |
+
if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
|
| 58 |
+
CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
|
| 59 |
+
vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
|
| 60 |
+
--host 0.0.0.0 \
|
| 61 |
+
--port 8002 \
|
| 62 |
+
--tensor-parallel-size "${GENSEARCHER_TP:-1}" \
|
| 63 |
+
--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
|
| 64 |
+
--served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
|
| 65 |
+
--max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
|
| 66 |
+
--no-enable-prefix-caching &
|
| 67 |
+
wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
|
| 68 |
+
export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
|
| 69 |
+
fi
|
| 70 |
+
|
| 71 |
+
# --- Optional local vLLM: browse summarization (Qwen3-VL) ---
|
| 72 |
+
if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
|
| 73 |
+
export BROWSE_GENERATE_ENGINE=vllm
|
| 74 |
+
CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
|
| 75 |
+
vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
|
| 76 |
+
--host 0.0.0.0 \
|
| 77 |
+
--port 8003 \
|
| 78 |
+
--tensor-parallel-size "${BROWSE_TP:-1}" \
|
| 79 |
+
--gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
|
| 80 |
+
--served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
|
| 81 |
+
--max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
|
| 82 |
+
--mm-processor-cache-gb 0 \
|
| 83 |
+
--no-enable-prefix-caching &
|
| 84 |
+
wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
|
| 85 |
+
export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
|
| 86 |
+
fi
|
| 87 |
+
|
| 88 |
+
# --- FireRed adapter (GenSearcher /generate contract) ---
|
| 89 |
+
if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
|
| 90 |
+
CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
|
| 91 |
+
python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
|
| 92 |
+
wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
|
| 93 |
+
export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
|
| 94 |
+
else
|
| 95 |
+
echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
|
| 96 |
+
fi
|
| 97 |
+
|
| 98 |
+
exec python app.py
|
space_health.py
CHANGED
|
@@ -53,9 +53,12 @@ def llm_endpoint_status() -> str:
|
|
| 53 |
|
| 54 |
if not gen_base:
|
| 55 |
lines.append(
|
| 56 |
-
"**GenSearcher LLM:** `OPENAI_BASE_URL` is **not set**.
|
| 57 |
-
"
|
| 58 |
-
"(
|
|
|
|
|
|
|
|
|
|
| 59 |
)
|
| 60 |
else:
|
| 61 |
ok, msg = check_v1_models(gen_base, gen_key)
|
|
|
|
| 53 |
|
| 54 |
if not gen_base:
|
| 55 |
lines.append(
|
| 56 |
+
"**GenSearcher LLM:** `OPENAI_BASE_URL` is **not set**.\n\n"
|
| 57 |
+
"- **All compute in this Space (recommended for your case):** add a Space variable "
|
| 58 |
+
"`START_VLLM_GENSEARCHER=1` (and enough GPU). The entrypoint starts **vLLM for Gen-Searcher-8B inside this "
|
| 59 |
+
"same container** and sets `OPENAI_BASE_URL` to `http://127.0.0.1:8002/v1`. That is still **this Space** — "
|
| 60 |
+
"not a second Hugging Face Space. The app talks to vLLM over **localhost** inside the container (normal for vLLM).\n\n"
|
| 61 |
+
"- **Or** set `OPENAI_BASE_URL` yourself to any OpenAI-compatible **`…/v1`** URL (only if the model runs elsewhere).\n"
|
| 62 |
)
|
| 63 |
else:
|
| 64 |
ok, msg = check_v1_models(gen_base, gen_key)
|