LehongWu commited on
Commit
a4b1a9c
·
verified ·
1 Parent(s): 6249f41

Upload folder using huggingface_hub

Browse files
docs/SPEC_WEB_UI.md CHANGED
@@ -8,7 +8,7 @@ Image and video generation from prompts and optional reference images.
8
 
9
  ## AI 创作台
10
 
11
- **A. 图片** — 0–3 张参考图 + 提示词 → 一张图。思考强度三档(Flash minimal / Flash high / Pro high);模型 ID `thinking_level` 见后端。宽高比、分辨率见 `generation_options.json`。
12
 
13
  **B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置;**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**;仅文案时 4/6/8s(以 API 为准)。
14
 
 
8
 
9
  ## AI 创作台
10
 
11
+ **A. 图片** — 0–3 张参考图 + 提示词 → 一张图。模型含 Nano Banana(`gemini-2.5-flash-image`,无长思考)、Nano Banana 2(`gemini-3.1-flash-image-preview`,可选长思考)、Nano Banana Pro(`gemini-3-pro-image-preview`,长思考);默认选项为 Nano Banana 2 快速。宽高比、分辨率见 `generation_options.json`。
12
 
13
  **B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置;**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**;仅文案时 4/6/8s(以 API 为准)。
14
 
gen_image_image_cond.py CHANGED
@@ -78,8 +78,12 @@ def load_pil_image(image_path: Path) -> Image.Image:
78
 
79
  def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
80
  kwargs: dict = {"aspect_ratio": args.aspect_ratio}
81
- gemini3_models = {"gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"}
82
- if args.model in gemini3_models:
 
 
 
 
83
  kwargs["image_size"] = args.resolution
84
  return types.ImageConfig(**kwargs)
85
 
 
78
 
79
  def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
80
  kwargs: dict = {"aspect_ratio": args.aspect_ratio}
81
+ image_models_with_size = {
82
+ "gemini-2.5-flash-image",
83
+ "gemini-3.1-flash-image-preview",
84
+ "gemini-3-pro-image-preview",
85
+ }
86
+ if args.model in image_models_with_size:
87
  kwargs["image_size"] = args.resolution
88
  return types.ImageConfig(**kwargs)
89
 
gen_image_prompt_only.py CHANGED
@@ -56,8 +56,12 @@ def parse_args() -> argparse.Namespace:
56
 
57
  def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
58
  kwargs: dict = {"aspect_ratio": args.aspect_ratio}
59
- gemini3_models = {"gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"}
60
- if args.model in gemini3_models:
 
 
 
 
61
  kwargs["image_size"] = args.resolution
62
  return types.ImageConfig(**kwargs)
63
 
 
56
 
57
  def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
58
  kwargs: dict = {"aspect_ratio": args.aspect_ratio}
59
+ image_models_with_size = {
60
+ "gemini-2.5-flash-image",
61
+ "gemini-3.1-flash-image-preview",
62
+ "gemini-3-pro-image-preview",
63
+ }
64
+ if args.model in image_models_with_size:
65
  kwargs["image_size"] = args.resolution
66
  return types.ImageConfig(**kwargs)
67
 
web/backend/services/gemini_image.py CHANGED
@@ -9,7 +9,14 @@ from google import genai
9
  from google.genai import types
10
  from PIL import Image
11
 
12
- GEMINI3_IMAGE_MODELS = frozenset({"gemini-3.1-flash-image-preview", "gemini-3-pro-image-preview"})
 
 
 
 
 
 
 
13
 
14
 
15
  def build_image_config(
@@ -18,7 +25,7 @@ def build_image_config(
18
  resolution: str,
19
  ) -> types.ImageConfig:
20
  kwargs: dict = {"aspect_ratio": aspect_ratio}
21
- if model in GEMINI3_IMAGE_MODELS:
22
  kwargs["image_size"] = resolution
23
  return types.ImageConfig(**kwargs)
24
 
@@ -46,6 +53,7 @@ def generate_image_bytes(
46
  "response_modalities": ["IMAGE"],
47
  "image_config": image_config,
48
  }
 
49
  if thinking_level:
50
  if model == "gemini-3.1-flash-image-preview":
51
  config_kwargs["thinking_config"] = types.ThinkingConfig(
 
9
  from google.genai import types
10
  from PIL import Image
11
 
12
+ # Models that accept ImageConfig.image_size (1K / 2K / 4K) in the Gemini image API.
13
+ IMAGE_MODELS_WITH_SIZE = frozenset(
14
+ {
15
+ "gemini-2.5-flash-image",
16
+ "gemini-3.1-flash-image-preview",
17
+ "gemini-3-pro-image-preview",
18
+ }
19
+ )
20
 
21
 
22
  def build_image_config(
 
25
  resolution: str,
26
  ) -> types.ImageConfig:
27
  kwargs: dict = {"aspect_ratio": aspect_ratio}
28
+ if model in IMAGE_MODELS_WITH_SIZE:
29
  kwargs["image_size"] = resolution
30
  return types.ImageConfig(**kwargs)
31
 
 
53
  "response_modalities": ["IMAGE"],
54
  "image_config": image_config,
55
  }
56
+ # gemini-2.5-flash-image does not support thinking (API capability table).
57
  if thinking_level:
58
  if model == "gemini-3.1-flash-image-preview":
59
  config_kwargs["thinking_config"] = types.ThinkingConfig(
web/backend/static/assets/index-l1IkTo6p.js ADDED
The diff for this file is too large to render. See raw diff
 
web/backend/static/index.html CHANGED
@@ -53,7 +53,7 @@
53
  }
54
  })();
55
  </script>
56
- <script type="module" crossorigin src="/assets/index-DKqfgLWk.js"></script>
57
  <link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
58
  </head>
59
  <body>
 
53
  }
54
  })();
55
  </script>
56
+ <script type="module" crossorigin src="/assets/index-l1IkTo6p.js"></script>
57
  <link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
58
  </head>
59
  <body>
web/config/generation_options.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "image": {
3
  "models": [
 
4
  { "value": "gemini-3.1-flash-image-preview", "label": "Nano Banana 2" },
5
  { "value": "gemini-3-pro-image-preview", "label": "Nano Banana Pro" }
6
  ],
 
1
  {
2
  "image": {
3
  "models": [
4
+ { "value": "gemini-2.5-flash-image", "label": "Nano Banana" },
5
  { "value": "gemini-3.1-flash-image-preview", "label": "Nano Banana 2" },
6
  { "value": "gemini-3-pro-image-preview", "label": "Nano Banana Pro" }
7
  ],
web/frontend/src/pages/ImageGen.tsx CHANGED
@@ -5,13 +5,17 @@ import { ElapsedTimer } from "../components/ElapsedTimer";
5
  import { ImageSlot } from "../components/ImageSlot";
6
  import { useGenerationOptions } from "../context/GenerationOptionsContext";
7
 
 
8
  const FLASH_ID = "gemini-3.1-flash-image-preview";
9
  const PRO_ID = "gemini-3-pro-image-preview";
10
 
11
- type ImageGenVariant = "flash-minimal" | "flash-high" | "pro-high";
 
12
 
13
  export function ImageGen() {
14
  const opts = useGenerationOptions();
 
 
15
  const flashLabel =
16
  opts.image.models.find((m) => m.value === FLASH_ID)?.label ?? "Nano Banana 2";
17
  const proLabel =
@@ -31,11 +35,12 @@ export function ImageGen() {
31
 
32
  const variantOptions = useMemo(
33
  () => [
 
34
  { id: "flash-minimal" as const, label: flashLabel },
35
  { id: "flash-high" as const, label: `${flashLabel}(长思考)` },
36
  { id: "pro-high" as const, label: `${proLabel}(长思考)` },
37
  ],
38
- [flashLabel, proLabel],
39
  );
40
 
41
  async function onSubmit(e: FormEvent) {
@@ -49,6 +54,10 @@ export function ImageGen() {
49
  let model: string;
50
  let thinking: "minimal" | "high";
51
  switch (variant) {
 
 
 
 
52
  case "flash-minimal":
53
  model = FLASH_ID;
54
  thinking = "minimal";
@@ -101,7 +110,7 @@ export function ImageGen() {
101
  填写提示词;参考图最多三张。再选比例与分辨率。
102
  </p>
103
  <p className="text-xs text-mist leading-relaxed">
104
- {flashLabel} 与 {proLabel} 为实验性,接口或效果可能变更。
105
  </p>
106
  </div>
107
 
@@ -124,7 +133,7 @@ export function ImageGen() {
124
  ))}
125
  </select>
126
  <p className="text-xs text-mist mt-1.5">
127
- {flashLabel} 为默认快速选项;{flashLabel}(长思考)与 {proLabel}(长思考)为长思考模式,更慢、更细;{proLabel} 为 Pro 路线。
128
  </p>
129
  </div>
130
 
 
5
  import { ImageSlot } from "../components/ImageSlot";
6
  import { useGenerationOptions } from "../context/GenerationOptionsContext";
7
 
8
+ const FLASH25_ID = "gemini-2.5-flash-image";
9
  const FLASH_ID = "gemini-3.1-flash-image-preview";
10
  const PRO_ID = "gemini-3-pro-image-preview";
11
 
12
+ /** Order: older flash Nano Banana 2 (default) → long-thinking → Pro */
13
+ type ImageGenVariant = "nb25-minimal" | "flash-minimal" | "flash-high" | "pro-high";
14
 
15
  export function ImageGen() {
16
  const opts = useGenerationOptions();
17
+ const nano25Label =
18
+ opts.image.models.find((m) => m.value === FLASH25_ID)?.label ?? "Nano Banana";
19
  const flashLabel =
20
  opts.image.models.find((m) => m.value === FLASH_ID)?.label ?? "Nano Banana 2";
21
  const proLabel =
 
35
 
36
  const variantOptions = useMemo(
37
  () => [
38
+ { id: "nb25-minimal" as const, label: nano25Label },
39
  { id: "flash-minimal" as const, label: flashLabel },
40
  { id: "flash-high" as const, label: `${flashLabel}(长思考)` },
41
  { id: "pro-high" as const, label: `${proLabel}(长思考)` },
42
  ],
43
+ [nano25Label, flashLabel, proLabel],
44
  );
45
 
46
  async function onSubmit(e: FormEvent) {
 
54
  let model: string;
55
  let thinking: "minimal" | "high";
56
  switch (variant) {
57
+ case "nb25-minimal":
58
+ model = FLASH25_ID;
59
+ thinking = "minimal";
60
+ break;
61
  case "flash-minimal":
62
  model = FLASH_ID;
63
  thinking = "minimal";
 
110
  填写提示词;参考图最多三张。再选比例与分辨率。
111
  </p>
112
  <p className="text-xs text-mist leading-relaxed">
113
+ {nano25Label} 为稳定版;{flashLabel} 与 {proLabel} 为实验性,接口或效果可能变更。
114
  </p>
115
  </div>
116
 
 
133
  ))}
134
  </select>
135
  <p className="text-xs text-mist mt-1.5">
136
+ {nano25Label} 为上一代快速;{flashLabel} 为默认推荐;{flashLabel}(长思考)与 {proLabel}(长思考)为长思考模式,更慢、更细;{proLabel} 为 Pro 路线。
137
  </p>
138
  </div>
139
 
web/frontend/src/pages/ToolsUpscale.tsx CHANGED
@@ -158,7 +158,7 @@ export function ToolsUpscale() {
158
  原图必选。选图后宽高比自动匹配(可改);分辨率默认 4K。比例差太远时会提示。
159
  </p>
160
  <p className="text-xs text-mist leading-relaxed">
161
- 下拉中的名称为配置所示;当前图片模型为实验性,接口或效果可能变更。
162
  </p>
163
  </div>
164
 
 
158
  原图必选。选图后宽高比自动匹配(可改);分辨率默认 4K。比例差太远时会提示。
159
  </p>
160
  <p className="text-xs text-mist leading-relaxed">
161
+ 下拉中的名称为配置所示;Nano Banana 稳定版;Nano Banana 2 与 Nano Banana Pro 为实验性,接口或效果可能变更。
162
  </p>
163
  </div>
164