Gemini-VideoGeneration

Sleeping

App Files Files Community

LehongWu commited on 29 days ago

Commit

a4b1a9c

verified ·

1 Parent(s): 6249f41

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

docs/SPEC_WEB_UI.md +1 -1
gen_image_image_cond.py +6 -2
gen_image_prompt_only.py +6 -2
web/backend/services/gemini_image.py +10 -2
web/backend/static/assets/index-l1IkTo6p.js +0 -0
web/backend/static/index.html +1 -1
web/config/generation_options.json +1 -0
web/frontend/src/pages/ImageGen.tsx +13 -4
web/frontend/src/pages/ToolsUpscale.tsx +1 -1

docs/SPEC_WEB_UI.md CHANGED Viewed

@@ -8,7 +8,7 @@ Image and video generation from prompts and optional reference images.
 ## AI 创作台
-**A. 图片** — 0–3 张参考图 + 提示词 → 一张图。思考强度三档（Flash minimal / Flash high / Pro high）；模型 ID 与 `thinking_level` 见后端。宽高比、分辨率见 `generation_options.json`。
 **B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置；**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**；仅文案时 4/6/8s（以 API 为准）。

 ## AI 创作台
+**A. 图片** — 0–3 张参考图 + 提示词 → 一张图。模型含 Nano Banana（`gemini-2.5-flash-image`，无长思考）、Nano Banana 2（`gemini-3.1-flash-image-preview`，可选长思考）、Nano Banana Pro（`gemini-3-pro-image-preview`，长思考）；默认选项为 Nano Banana 2 快速。宽高比、分辨率见 `generation_options.json`。
 **B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置；**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**；仅文案时 4/6/8s（以 API 为准）。

gen_image_image_cond.py CHANGED Viewed

@@ -78,8 +78,12 @@ def load_pil_image(image_path: Path) -> Image.Image:
 def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
     kwargs: dict = {"aspect_ratio": args.aspect_ratio}
-    gemini3_models = {"gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"}
-    if args.model in gemini3_models:
         kwargs["image_size"] = args.resolution
     return types.ImageConfig(**kwargs)

 def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
     kwargs: dict = {"aspect_ratio": args.aspect_ratio}
+    image_models_with_size = {
+        "gemini-2.5-flash-image",
+        "gemini-3.1-flash-image-preview",
+        "gemini-3-pro-image-preview",
+    }
+    if args.model in image_models_with_size:
         kwargs["image_size"] = args.resolution
     return types.ImageConfig(**kwargs)

gen_image_prompt_only.py CHANGED Viewed

@@ -56,8 +56,12 @@ def parse_args() -> argparse.Namespace:
 def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
     kwargs: dict = {"aspect_ratio": args.aspect_ratio}
-    gemini3_models = {"gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"}
-    if args.model in gemini3_models:
         kwargs["image_size"] = args.resolution
     return types.ImageConfig(**kwargs)

 def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
     kwargs: dict = {"aspect_ratio": args.aspect_ratio}
+    image_models_with_size = {
+        "gemini-2.5-flash-image",
+        "gemini-3.1-flash-image-preview",
+        "gemini-3-pro-image-preview",
+    }
+    if args.model in image_models_with_size:
         kwargs["image_size"] = args.resolution
     return types.ImageConfig(**kwargs)

web/backend/services/gemini_image.py CHANGED Viewed

@@ -9,7 +9,14 @@ from google import genai
 from google.genai import types
 from PIL import Image
-GEMINI3_IMAGE_MODELS = frozenset({"gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"})
 def build_image_config(
@@ -18,7 +25,7 @@ def build_image_config(
     resolution: str,
 ) -> types.ImageConfig:
     kwargs: dict = {"aspect_ratio": aspect_ratio}
-    if model in GEMINI3_IMAGE_MODELS:
         kwargs["image_size"] = resolution
     return types.ImageConfig(**kwargs)
@@ -46,6 +53,7 @@ def generate_image_bytes(
         "response_modalities": ["IMAGE"],
         "image_config": image_config,
     }
     if thinking_level:
         if model == "gemini-3.1-flash-image-preview":
             config_kwargs["thinking_config"] = types.ThinkingConfig(

 from google.genai import types
 from PIL import Image
+# Models that accept ImageConfig.image_size (1K / 2K / 4K) in the Gemini image API.
+IMAGE_MODELS_WITH_SIZE = frozenset(
+    {
+        "gemini-2.5-flash-image",
+        "gemini-3.1-flash-image-preview",
+        "gemini-3-pro-image-preview",
+    }
+)
 def build_image_config(
     resolution: str,
 ) -> types.ImageConfig:
     kwargs: dict = {"aspect_ratio": aspect_ratio}
+    if model in IMAGE_MODELS_WITH_SIZE:
         kwargs["image_size"] = resolution
     return types.ImageConfig(**kwargs)
         "response_modalities": ["IMAGE"],
         "image_config": image_config,
     }
+    # gemini-2.5-flash-image does not support thinking (API capability table).
     if thinking_level:
         if model == "gemini-3.1-flash-image-preview":
             config_kwargs["thinking_config"] = types.ThinkingConfig(

web/backend/static/assets/index-l1IkTo6p.js ADDED Viewed

The diff for this file is too large to render. See raw diff

web/backend/static/index.html CHANGED Viewed

@@ -53,7 +53,7 @@
         }
       })();
     </script>
-    <script type="module" crossorigin src="/assets/index-DKqfgLWk.js"></script>
     <link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
   </head>
   <body>

         }
       })();
     </script>
+    <script type="module" crossorigin src="/assets/index-l1IkTo6p.js"></script>
     <link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
   </head>
   <body>

web/config/generation_options.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "image": {
     "models": [
       { "value": "gemini-3.1-flash-image-preview", "label": "Nano Banana 2" },
       { "value": "gemini-3-pro-image-preview", "label": "Nano Banana Pro" }
     ],

 {
   "image": {
     "models": [
+      { "value": "gemini-2.5-flash-image", "label": "Nano Banana" },
       { "value": "gemini-3.1-flash-image-preview", "label": "Nano Banana 2" },
       { "value": "gemini-3-pro-image-preview", "label": "Nano Banana Pro" }
     ],

web/frontend/src/pages/ImageGen.tsx CHANGED Viewed

@@ -5,13 +5,17 @@ import { ElapsedTimer } from "../components/ElapsedTimer";
 import { ImageSlot } from "../components/ImageSlot";
 import { useGenerationOptions } from "../context/GenerationOptionsContext";
 const FLASH_ID = "gemini-3.1-flash-image-preview";
 const PRO_ID = "gemini-3-pro-image-preview";
-type ImageGenVariant = "flash-minimal" | "flash-high" | "pro-high";
 export function ImageGen() {
   const opts = useGenerationOptions();
   const flashLabel =
     opts.image.models.find((m) => m.value === FLASH_ID)?.label ?? "Nano Banana 2";
   const proLabel =
@@ -31,11 +35,12 @@ export function ImageGen() {
   const variantOptions = useMemo(
     () => [
       { id: "flash-minimal" as const, label: flashLabel },
       { id: "flash-high" as const, label: `${flashLabel}（长思考）` },
       { id: "pro-high" as const, label: `${proLabel}（长思考）` },
     ],
-    [flashLabel, proLabel],
   );
   async function onSubmit(e: FormEvent) {
@@ -49,6 +54,10 @@ export function ImageGen() {
     let model: string;
     let thinking: "minimal" | "high";
     switch (variant) {
       case "flash-minimal":
         model = FLASH_ID;
         thinking = "minimal";
@@ -101,7 +110,7 @@ export function ImageGen() {
           填写提示词；参考图最多三张。再选比例与分辨率。
         </p>
         <p className="text-xs text-mist leading-relaxed">
-          {flashLabel} 与 {proLabel} 为实验性，接口或效果可能变更。
         </p>
       </div>
@@ -124,7 +133,7 @@ export function ImageGen() {
               ))}
             </select>
             <p className="text-xs text-mist mt-1.5">
-              {flashLabel} 为默认快速选项；{flashLabel}（长思考）与 {proLabel}（长思考）为长思考模式，更慢、更细；{proLabel} 为 Pro 路线。
             </p>
           </div>

 import { ImageSlot } from "../components/ImageSlot";
 import { useGenerationOptions } from "../context/GenerationOptionsContext";
+const FLASH25_ID = "gemini-2.5-flash-image";
 const FLASH_ID = "gemini-3.1-flash-image-preview";
 const PRO_ID = "gemini-3-pro-image-preview";
+/** Order: older flash → Nano Banana 2 (default) → long-thinking → Pro */
+type ImageGenVariant = "nb25-minimal" | "flash-minimal" | "flash-high" | "pro-high";
 export function ImageGen() {
   const opts = useGenerationOptions();
+  const nano25Label =
+    opts.image.models.find((m) => m.value === FLASH25_ID)?.label ?? "Nano Banana";
   const flashLabel =
     opts.image.models.find((m) => m.value === FLASH_ID)?.label ?? "Nano Banana 2";
   const proLabel =
   const variantOptions = useMemo(
     () => [
+      { id: "nb25-minimal" as const, label: nano25Label },
       { id: "flash-minimal" as const, label: flashLabel },
       { id: "flash-high" as const, label: `${flashLabel}（长思考）` },
       { id: "pro-high" as const, label: `${proLabel}（长思考）` },
     ],
+    [nano25Label, flashLabel, proLabel],
   );
   async function onSubmit(e: FormEvent) {
     let model: string;
     let thinking: "minimal" | "high";
     switch (variant) {
+      case "nb25-minimal":
+        model = FLASH25_ID;
+        thinking = "minimal";
+        break;
       case "flash-minimal":
         model = FLASH_ID;
         thinking = "minimal";
           填写提示词；参考图最多三张。再选比例与分辨率。
         </p>
         <p className="text-xs text-mist leading-relaxed">
+          {nano25Label} 为稳定版；{flashLabel} 与 {proLabel} 为实验性，接口或效果可能变更。
         </p>
       </div>
               ))}
             </select>
             <p className="text-xs text-mist mt-1.5">
+              {nano25Label} 为上一代快速；{flashLabel} 为默认推荐；{flashLabel}（长思考）与 {proLabel}（长思考）为长思考模式，更慢、更细；{proLabel} 为 Pro 路线。
             </p>
           </div>

web/frontend/src/pages/ToolsUpscale.tsx CHANGED Viewed

@@ -158,7 +158,7 @@ export function ToolsUpscale() {
           原图必选。选图后宽高比自动匹配（可改）；分辨率默认 4K。比例差太远时会提示。
         </p>
         <p className="text-xs text-mist leading-relaxed">
-          下拉中的名称为配置所示；当前图片模型为实验性，接口或效果可能变更。
         </p>
       </div>

           原图必选。选图后宽高比自动匹配（可改）；分辨率默认 4K。比例差太远时会提示。
         </p>
         <p className="text-xs text-mist leading-relaxed">
+          下拉中的名称为配置所示；Nano Banana 为稳定版；Nano Banana 2 与 Nano Banana Pro 为实验性，接口或效果可能变更。
         </p>
       </div>