JSCPPProgrammer commited on
Commit
c994fd2
·
verified ·
1 Parent(s): 1fd52ef

Fix HF exec entrypoint: generate entrypoint.sh in Docker; body in entrypoint_body.sh

Browse files
Files changed (5) hide show
  1. Dockerfile +14 -5
  2. app.py +3 -3
  3. scripts/entrypoint.sh +0 -42
  4. scripts/entrypoint_body.sh +98 -0
  5. space_health.py +6 -3
Dockerfile CHANGED
@@ -15,10 +15,21 @@ COPY vendor/rllm /app/vendor/rllm
15
  COPY requirements.txt /app/requirements.txt
16
  COPY app.py space_gen.py space_health.py /app/
17
  COPY services /app/services
18
- COPY scripts /app/scripts
 
 
 
 
 
19
 
20
  ENV PYTHONPATH=/app/vendor/rllm
21
  ENV GRADIO_SERVER_PORT=7860
 
 
 
 
 
 
22
 
23
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
24
  && pip install --no-cache-dir -e /app/vendor/rllm \
@@ -28,9 +39,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
28
  ARG INSTALL_VLLM=1
29
  RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
30
 
31
- # Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
32
- RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
33
-
34
  EXPOSE 7860
35
 
36
- CMD ["/app/scripts/entrypoint.sh"]
 
 
15
  COPY requirements.txt /app/requirements.txt
16
  COPY app.py space_gen.py space_health.py /app/
17
  COPY services /app/services
18
+ COPY scripts/entrypoint_body.sh scripts/verify_env.py /app/scripts/
19
+ # HF may exec /app/scripts/entrypoint.sh directly — never COPY it from Windows (CRLF → "exec format error").
20
+ # Build a 2-line LF-only stub; normalize body script bytes inside Linux.
21
+ RUN python3 -c "import pathlib; p=pathlib.Path('/app/scripts/entrypoint_body.sh'); b=p.read_bytes(); b=b.lstrip(b'\xef\xbb\xbf'); b=b.replace(b'\r\n', b'\n').replace(b'\r', b''); p.write_bytes(b)" \
22
+ && printf '%s\n' '#!/bin/bash' 'exec /bin/bash /app/scripts/entrypoint_body.sh' > /app/scripts/entrypoint.sh \
23
+ && chmod +x /app/scripts/entrypoint.sh /app/scripts/entrypoint_body.sh
24
 
25
  ENV PYTHONPATH=/app/vendor/rllm
26
  ENV GRADIO_SERVER_PORT=7860
27
+ # HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
28
+ # getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
29
+ ENV USER=huggingface
30
+ ENV LOGNAME=huggingface
31
+ ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
32
+ ENV TRITON_CACHE_DIR=/tmp/triton_cache
33
 
34
  RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
35
  && pip install --no-cache-dir -e /app/vendor/rllm \
 
39
  ARG INSTALL_VLLM=1
40
  RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
41
 
 
 
 
42
  EXPOSE 7860
43
 
44
+ # PID 1 is bash (real ELF). HF may ignore this and exec entrypoint.sh; that file is generated above with valid shebang.
45
+ CMD ["/bin/bash", "/app/scripts/entrypoint_body.sh"]
app.py CHANGED
@@ -75,9 +75,9 @@ with gr.Blocks(title="GenSearcher + FireRed") as demo:
75
  "## GenSearcher + FireRed-Image-Edit-1.1\n"
76
  "Runs the **official** GenSearcher search/browse/image-search agent (vLLM), "
77
  "then generates with **FireRed** via the same `/generate` API as the Qwen edit server.\n\n"
78
- "**LLM (required):** a reachable **OpenAI-compatible** URL in `OPENAI_BASE_URL` (must include `/v1`) for "
79
- "[Gen-Searcher-8B](https://huggingface.co/GenSearcher/Gen-Searcher-8B), plus `BROWSE_SUMMARY_BASE_URL` when "
80
- "using browse summarization with `BROWSE_GENERATE_ENGINE=vllm` (see README).\n\n"
81
  "**Search / browse (optional keys):** without `SERPER_KEY_ID` and `JINA_API_KEYS`, the agent uses **DuckDuckGo** "
82
  "for web and image search and **direct HTTP** page fetch for visits. Set those secrets if you prefer Serper + Jina.\n\n"
83
  "**Connection errors:** On Hugging Face Spaces, `http://127.0.0.1:8002/v1` only works if you run vLLM "
 
75
  "## GenSearcher + FireRed-Image-Edit-1.1\n"
76
  "Runs the **official** GenSearcher search/browse/image-search agent (vLLM), "
77
  "then generates with **FireRed** via the same `/generate` API as the Qwen edit server.\n\n"
78
+ "**LLM:** Either run Gen-Searcher **in this same Space** (`START_VLLM_GENSEARCHER=1` vLLM on localhost; "
79
+ "no second Space), **or** set `OPENAI_BASE_URL` to an OpenAI-compatible **`…/v1`** endpoint. "
80
+ "Browse summarization needs `BROWSE_SUMMARY_BASE_URL` when `BROWSE_GENERATE_ENGINE=vllm` (see README).\n\n"
81
  "**Search / browse (optional keys):** without `SERPER_KEY_ID` and `JINA_API_KEYS`, the agent uses **DuckDuckGo** "
82
  "for web and image search and **direct HTTP** page fetch for visits. Set those secrets if you prefer Serper + Jina.\n\n"
83
  "**Connection errors:** On Hugging Face Spaces, `http://127.0.0.1:8002/v1` only works if you run vLLM "
scripts/entrypoint.sh DELETED
@@ -1,42 +0,0 @@
1
- # Hugging Face Space (Docker) — GenSearcher + FireRed
2
- # Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
3
-
4
- FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
5
-
6
- ENV DEBIAN_FRONTEND=noninteractive
7
- RUN apt-get update && apt-get install -y --no-install-recommends \
8
- curl \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- WORKDIR /app
13
-
14
- COPY vendor/rllm /app/vendor/rllm
15
- COPY requirements.txt /app/requirements.txt
16
- COPY app.py space_gen.py space_health.py /app/
17
- COPY services /app/services
18
- COPY scripts /app/scripts
19
- # CRLF here causes "exec format error" when the container execs the script; strip immediately after copy.
20
- RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
21
-
22
- ENV PYTHONPATH=/app/vendor/rllm
23
- ENV GRADIO_SERVER_PORT=7860
24
- # HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
25
- # getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
26
- ENV USER=huggingface
27
- ENV LOGNAME=huggingface
28
- ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
29
- ENV TRITON_CACHE_DIR=/tmp/triton_cache
30
-
31
- RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
32
- && pip install --no-cache-dir -e /app/vendor/rllm \
33
- && pip install --no-cache-dir -r /app/requirements.txt
34
-
35
- # Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
36
- ARG INSTALL_VLLM=1
37
- RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
38
-
39
- EXPOSE 7860
40
-
41
- # Invoke via bash so a bad shebang/CRLF cannot produce "exec format error" from the kernel.
42
- CMD ["/bin/bash", "/app/scripts/entrypoint.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/entrypoint_body.sh ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Main startup logic (run as: bash /app/scripts/entrypoint_body.sh).
2
+ # entrypoint.sh is generated in the Dockerfile so HF can exec it without CRLF/BOM issues.
3
+ set -euo pipefail
4
+ cd /app
5
+
6
+ # Same-container vLLM: PyTorch may call getpass.getuser() before USER is set in some runtimes.
7
+ export USER="${USER:-huggingface}"
8
+ export LOGNAME="${LOGNAME:-$USER}"
9
+ export TORCHINDUCTOR_CACHE_DIR="${TORCHINDUCTOR_CACHE_DIR:-/tmp/torch_inductor_cache}"
10
+ export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-/tmp/triton_cache}"
11
+
12
+ export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
13
+
14
+ # Optional: load Space secrets copied to this path
15
+ if [[ -f /app/.env.gen_image ]]; then
16
+ set -a
17
+ # shellcheck source=/dev/null
18
+ source /app/.env.gen_image
19
+ set +a
20
+ fi
21
+
22
+ if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then
23
+ case "${OPENAI_BASE_URL:-}" in
24
+ *127.0.0.1*|*localhost*)
25
+ echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1."
26
+ echo "[entrypoint] The GenSearcher agent will get 'Connection error' unless a server listens here,"
27
+ echo "[entrypoint] or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)."
28
+ ;;
29
+ esac
30
+ if [[ -z "${OPENAI_BASE_URL:-}" ]]; then
31
+ echo "[entrypoint] OPENAI_BASE_URL is unset. For GenSearcher **inside this Space only**, set Space variable"
32
+ echo "[entrypoint] START_VLLM_GENSEARCHER=1 (entrypoint will start vLLM here and set OPENAI_BASE_URL to loopback)."
33
+ fi
34
+ fi
35
+
36
+ wait_http() {
37
+ local url=$1
38
+ local name=$2
39
+ local max_attempts=${3:-90}
40
+ local i=0
41
+ echo "[entrypoint] Waiting for ${name} (${url})..."
42
+ until curl -sf "$url" >/dev/null 2>&1; do
43
+ i=$((i + 1))
44
+ if [[ $i -ge $max_attempts ]]; then
45
+ echo "[entrypoint] Timeout waiting for ${name}"
46
+ exit 1
47
+ fi
48
+ sleep 2
49
+ done
50
+ echo "[entrypoint] ${name} is up."
51
+ }
52
+
53
+ # Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
54
+ # to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
55
+
56
+ # --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
57
+ if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
58
+ CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
59
+ vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
60
+ --host 0.0.0.0 \
61
+ --port 8002 \
62
+ --tensor-parallel-size "${GENSEARCHER_TP:-1}" \
63
+ --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
64
+ --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
65
+ --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
66
+ --no-enable-prefix-caching &
67
+ wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
68
+ export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
69
+ fi
70
+
71
+ # --- Optional local vLLM: browse summarization (Qwen3-VL) ---
72
+ if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
73
+ export BROWSE_GENERATE_ENGINE=vllm
74
+ CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
75
+ vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
76
+ --host 0.0.0.0 \
77
+ --port 8003 \
78
+ --tensor-parallel-size "${BROWSE_TP:-1}" \
79
+ --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
80
+ --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
81
+ --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
82
+ --mm-processor-cache-gb 0 \
83
+ --no-enable-prefix-caching &
84
+ wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
85
+ export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
86
+ fi
87
+
88
+ # --- FireRed adapter (GenSearcher /generate contract) ---
89
+ if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
90
+ CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
91
+ python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
92
+ wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
93
+ export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
94
+ else
95
+ echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
96
+ fi
97
+
98
+ exec python app.py
space_health.py CHANGED
@@ -53,9 +53,12 @@ def llm_endpoint_status() -> str:
53
 
54
  if not gen_base:
55
  lines.append(
56
- "**GenSearcher LLM:** `OPENAI_BASE_URL` is **not set**. "
57
- "Add a Space secret pointing to an OpenAI-compatible server that serves **GenSearcher/Gen-Searcher-8B** "
58
- "(e.g. your own vLLM URL ending in `/v1`)."
 
 
 
59
  )
60
  else:
61
  ok, msg = check_v1_models(gen_base, gen_key)
 
53
 
54
  if not gen_base:
55
  lines.append(
56
+ "**GenSearcher LLM:** `OPENAI_BASE_URL` is **not set**.\n\n"
57
+ "- **All compute in this Space (recommended for your case):** add a Space variable "
58
+ "`START_VLLM_GENSEARCHER=1` (and enough GPU). The entrypoint starts **vLLM for Gen-Searcher-8B inside this "
59
+ "same container** and sets `OPENAI_BASE_URL` to `http://127.0.0.1:8002/v1`. That is still **this Space** — "
60
+ "not a second Hugging Face Space. The app talks to vLLM over **localhost** inside the container (normal for vLLM).\n\n"
61
+ "- **Or** set `OPENAI_BASE_URL` yourself to any OpenAI-compatible **`…/v1`** URL (only if the model runs elsewhere).\n"
62
  )
63
  else:
64
  ok, msg = check_v1_models(gen_base, gen_key)