Spaces:

JSCPPProgrammer
/

gensearcher-firered

Paused

App Files Files Community

JSCPPProgrammer commited on 14 days ago

Commit

c994fd2

verified ·

1 Parent(s): 1fd52ef

Fix HF exec entrypoint: generate entrypoint.sh in Docker; body in entrypoint_body.sh

Browse files

Files changed (5) hide show

Dockerfile +14 -5
app.py +3 -3
scripts/entrypoint.sh +0 -42
scripts/entrypoint_body.sh +98 -0
space_health.py +6 -3

Dockerfile CHANGED Viewed

@@ -15,10 +15,21 @@ COPY vendor/rllm /app/vendor/rllm
 COPY requirements.txt /app/requirements.txt
 COPY app.py space_gen.py space_health.py /app/
 COPY services /app/services
-COPY scripts /app/scripts
 ENV PYTHONPATH=/app/vendor/rllm
 ENV GRADIO_SERVER_PORT=7860
 RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
     && pip install --no-cache-dir -e /app/vendor/rllm \
@@ -28,9 +39,7 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
 ARG INSTALL_VLLM=1
 RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
-# Strip Windows CRLF if present (avoids: /usr/bin/env: 'bash\r': No such file or directory)
-RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
 EXPOSE 7860
-CMD ["/app/scripts/entrypoint.sh"]

 COPY requirements.txt /app/requirements.txt
 COPY app.py space_gen.py space_health.py /app/
 COPY services /app/services
+COPY scripts/entrypoint_body.sh scripts/verify_env.py /app/scripts/
+# HF may exec /app/scripts/entrypoint.sh directly — never COPY it from Windows (CRLF → "exec format error").
+# Build a 2-line LF-only stub; normalize body script bytes inside Linux.
+RUN python3 -c "import pathlib; p=pathlib.Path('/app/scripts/entrypoint_body.sh'); b=p.read_bytes(); b=b.lstrip(b'\xef\xbb\xbf'); b=b.replace(b'\r\n', b'\n').replace(b'\r', b''); p.write_bytes(b)" \
+    && printf '%s\n' '#!/bin/bash' 'exec /bin/bash /app/scripts/entrypoint_body.sh' > /app/scripts/entrypoint.sh \
+    && chmod +x /app/scripts/entrypoint.sh /app/scripts/entrypoint_body.sh
 ENV PYTHONPATH=/app/vendor/rllm
 ENV GRADIO_SERVER_PORT=7860
+# HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
+# getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
+ENV USER=huggingface
+ENV LOGNAME=huggingface
+ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
+ENV TRITON_CACHE_DIR=/tmp/triton_cache
 RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
     && pip install --no-cache-dir -e /app/vendor/rllm \
 ARG INSTALL_VLLM=1
 RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
 EXPOSE 7860
+# PID 1 is bash (real ELF). HF may ignore this and exec entrypoint.sh; that file is generated above with valid shebang.
+CMD ["/bin/bash", "/app/scripts/entrypoint_body.sh"]

app.py CHANGED Viewed

@@ -75,9 +75,9 @@ with gr.Blocks(title="GenSearcher + FireRed") as demo:
         "## GenSearcher + FireRed-Image-Edit-1.1\n"
         "Runs the **official** GenSearcher search/browse/image-search agent (vLLM), "
         "then generates with **FireRed** via the same `/generate` API as the Qwen edit server.\n\n"
-        "**LLM (required):** a reachable **OpenAI-compatible** URL in `OPENAI_BASE_URL` (must include `/v1`) for "
-        "[Gen-Searcher-8B](https://huggingface.co/GenSearcher/Gen-Searcher-8B), plus `BROWSE_SUMMARY_BASE_URL` when "
-        "using browse summarization with `BROWSE_GENERATE_ENGINE=vllm` (see README).\n\n"
         "**Search / browse (optional keys):** without `SERPER_KEY_ID` and `JINA_API_KEYS`, the agent uses **DuckDuckGo** "
         "for web and image search and **direct HTTP** page fetch for visits. Set those secrets if you prefer Serper + Jina.\n\n"
         "**Connection errors:** On Hugging Face Spaces, `http://127.0.0.1:8002/v1` only works if you run vLLM "

         "## GenSearcher + FireRed-Image-Edit-1.1\n"
         "Runs the **official** GenSearcher search/browse/image-search agent (vLLM), "
         "then generates with **FireRed** via the same `/generate` API as the Qwen edit server.\n\n"
+        "**LLM:** Either run Gen-Searcher **in this same Space** (`START_VLLM_GENSEARCHER=1` → vLLM on localhost; "
+        "no second Space), **or** set `OPENAI_BASE_URL` to an OpenAI-compatible **`…/v1`** endpoint. "
+        "Browse summarization needs `BROWSE_SUMMARY_BASE_URL` when `BROWSE_GENERATE_ENGINE=vllm` (see README).\n\n"
         "**Search / browse (optional keys):** without `SERPER_KEY_ID` and `JINA_API_KEYS`, the agent uses **DuckDuckGo** "
         "for web and image search and **direct HTTP** page fetch for visits. Set those secrets if you prefer Serper + Jina.\n\n"
         "**Connection errors:** On Hugging Face Spaces, `http://127.0.0.1:8002/v1` only works if you run vLLM "

scripts/entrypoint.sh DELETED Viewed

@@ -1,42 +0,0 @@
-# Hugging Face Space (Docker) — GenSearcher + FireRed
-# Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README.
-FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-WORKDIR /app
-COPY vendor/rllm /app/vendor/rllm
-COPY requirements.txt /app/requirements.txt
-COPY app.py space_gen.py space_health.py /app/
-COPY services /app/services
-COPY scripts /app/scripts
-# CRLF here causes "exec format error" when the container execs the script; strip immediately after copy.
-RUN sed -i 's/\r$//' /app/scripts/entrypoint.sh && chmod +x /app/scripts/entrypoint.sh
-ENV PYTHONPATH=/app/vendor/rllm
-ENV GRADIO_SERVER_PORT=7860
-# HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls
-# getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues.
-ENV USER=huggingface
-ENV LOGNAME=huggingface
-ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache
-ENV TRITON_CACHE_DIR=/tmp/triton_cache
-RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
-    && pip install --no-cache-dir -e /app/vendor/rllm \
-    && pip install --no-cache-dir -r /app/requirements.txt
-# Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs.
-ARG INSTALL_VLLM=1
-RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi
-EXPOSE 7860
-# Invoke via bash so a bad shebang/CRLF cannot produce "exec format error" from the kernel.
-CMD ["/bin/bash", "/app/scripts/entrypoint.sh"]

scripts/entrypoint_body.sh ADDED Viewed

	@@ -0,0 +1,98 @@

+# Main startup logic (run as: bash /app/scripts/entrypoint_body.sh).
+# entrypoint.sh is generated in the Dockerfile so HF can exec it without CRLF/BOM issues.
+set -euo pipefail
+cd /app
+# Same-container vLLM: PyTorch may call getpass.getuser() before USER is set in some runtimes.
+export USER="${USER:-huggingface}"
+export LOGNAME="${LOGNAME:-$USER}"
+export TORCHINDUCTOR_CACHE_DIR="${TORCHINDUCTOR_CACHE_DIR:-/tmp/torch_inductor_cache}"
+export TRITON_CACHE_DIR="${TRITON_CACHE_DIR:-/tmp/triton_cache}"
+export PYTHONPATH="/app/vendor/rllm:${PYTHONPATH:-}"
+# Optional: load Space secrets copied to this path
+if [[ -f /app/.env.gen_image ]]; then
+  set -a
+  # shellcheck source=/dev/null
+  source /app/.env.gen_image
+  set +a
+fi
+if [[ "${START_VLLM_GENSEARCHER:-0}" != "1" ]]; then
+  case "${OPENAI_BASE_URL:-}" in
+    *127.0.0.1*|*localhost*)
+      echo "[entrypoint] WARNING: OPENAI_BASE_URL points to loopback but START_VLLM_GENSEARCHER is not 1."
+      echo "[entrypoint]          The GenSearcher agent will get 'Connection error' unless a server listens here,"
+      echo "[entrypoint]          or you set OPENAI_BASE_URL to an external OpenAI-compatible URL (ending in /v1)."
+      ;;
+  esac
+  if [[ -z "${OPENAI_BASE_URL:-}" ]]; then
+    echo "[entrypoint] OPENAI_BASE_URL is unset. For GenSearcher **inside this Space only**, set Space variable"
+    echo "[entrypoint] START_VLLM_GENSEARCHER=1 (entrypoint will start vLLM here and set OPENAI_BASE_URL to loopback)."
+  fi
+fi
+wait_http() {
+  local url=$1
+  local name=$2
+  local max_attempts=${3:-90}
+  local i=0
+  echo "[entrypoint] Waiting for ${name} (${url})..."
+  until curl -sf "$url" >/dev/null 2>&1; do
+    i=$((i + 1))
+    if [[ $i -ge $max_attempts ]]; then
+      echo "[entrypoint] Timeout waiting for ${name}"
+      exit 1
+    fi
+    sleep 2
+  done
+  echo "[entrypoint] ${name} is up."
+}
+# Defaults: only FireRed + Gradio in-container. Point OPENAI_BASE_URL / BROWSE_SUMMARY_BASE_URL
+# to your vLLM (or other OpenAI-compatible) endpoints via Space secrets.
+# --- Optional local vLLM: GenSearcher-8B (OpenAI-compatible) ---
+if [[ "${START_VLLM_GENSEARCHER:-0}" == "1" ]]; then
+  CUDA_VISIBLE_DEVICES="${GENSEARCHER_CUDA_VISIBLE_DEVICES:-0}" \
+    vllm serve "${GENSEARCHER_MODEL_ID:-GenSearcher/Gen-Searcher-8B}" \
+    --host 0.0.0.0 \
+    --port 8002 \
+    --tensor-parallel-size "${GENSEARCHER_TP:-1}" \
+    --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
+    --served-model-name "${GEN_EVAL_MODEL:-Gen-Searcher-8B}" \
+    --max-model-len "${GENSEARCHER_MAX_MODEL_LEN:-65536}" \
+    --no-enable-prefix-caching &
+  wait_http "http://127.0.0.1:8002/v1/models" "GenSearcher vLLM"
+  export OPENAI_BASE_URL="${OPENAI_BASE_URL:-http://127.0.0.1:8002/v1}"
+fi
+# --- Optional local vLLM: browse summarization (Qwen3-VL) ---
+if [[ "${START_VLLM_BROWSE:-0}" == "1" ]]; then
+  export BROWSE_GENERATE_ENGINE=vllm
+  CUDA_VISIBLE_DEVICES="${BROWSE_CUDA_VISIBLE_DEVICES:-1}" \
+    vllm serve "${BROWSE_MODEL_ID:-Qwen/Qwen3-VL-30B-A3B-Instruct}" \
+    --host 0.0.0.0 \
+    --port 8003 \
+    --tensor-parallel-size "${BROWSE_TP:-1}" \
+    --gpu-memory-utilization "${VLLM_GPU_MEMORY_UTIL:-0.85}" \
+    --served-model-name "${BROWSE_SUMMARY_MODEL:-Qwen3-VL-30B-A3B-Instruct}" \
+    --max-model-len "${BROWSE_MAX_MODEL_LEN:-65536}" \
+    --mm-processor-cache-gb 0 \
+    --no-enable-prefix-caching &
+  wait_http "http://127.0.0.1:8003/v1/models" "Browse-summary vLLM"
+  export BROWSE_SUMMARY_BASE_URL="${BROWSE_SUMMARY_BASE_URL:-http://127.0.0.1:8003/v1}"
+fi
+# --- FireRed adapter (GenSearcher /generate contract) ---
+if [[ "${START_FIRERED_API:-1}" == "1" ]]; then
+  CUDA_VISIBLE_DEVICES="${FIRERED_CUDA_VISIBLE_DEVICES:-0}" \
+    python -m uvicorn services.firered_generate:app --host 0.0.0.0 --port 8765 &
+  wait_http "http://127.0.0.1:8765/health" "FireRed API" 120
+  export QWEN_EDIT_APP_URL="${QWEN_EDIT_APP_URL:-http://127.0.0.1:8765}"
+else
+  echo "[entrypoint] START_FIRERED_API=0 — use external QWEN_EDIT_APP_URL for generation."
+fi
+exec python app.py

space_health.py CHANGED Viewed

@@ -53,9 +53,12 @@ def llm_endpoint_status() -> str:
     if not gen_base:
         lines.append(
-            "**GenSearcher LLM:** `OPENAI_BASE_URL` is **not set**. "
-            "Add a Space secret pointing to an OpenAI-compatible server that serves **GenSearcher/Gen-Searcher-8B** "
-            "(e.g. your own vLLM URL ending in `/v1`)."
         )
     else:
         ok, msg = check_v1_models(gen_base, gen_key)

     if not gen_base:
         lines.append(
+            "**GenSearcher LLM:** `OPENAI_BASE_URL` is **not set**.\n\n"
+            "- **All compute in this Space (recommended for your case):** add a Space variable "
+            "`START_VLLM_GENSEARCHER=1` (and enough GPU). The entrypoint starts **vLLM for Gen-Searcher-8B inside this "
+            "same container** and sets `OPENAI_BASE_URL` to `http://127.0.0.1:8002/v1`. That is still **this Space** — "
+            "not a second Hugging Face Space. The app talks to vLLM over **localhost** inside the container (normal for vLLM).\n\n"
+            "- **Or** set `OPENAI_BASE_URL` yourself to any OpenAI-compatible **`…/v1`** URL (only if the model runs elsewhere).\n"
         )
     else:
         ok, msg = check_v1_models(gen_base, gen_key)