Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- docs/SPEC_WEB_UI.md +1 -1
- gen_image_image_cond.py +6 -2
- gen_image_prompt_only.py +6 -2
- web/backend/services/gemini_image.py +10 -2
- web/backend/static/assets/index-l1IkTo6p.js +0 -0
- web/backend/static/index.html +1 -1
- web/config/generation_options.json +1 -0
- web/frontend/src/pages/ImageGen.tsx +13 -4
- web/frontend/src/pages/ToolsUpscale.tsx +1 -1
docs/SPEC_WEB_UI.md
CHANGED
|
@@ -8,7 +8,7 @@ Image and video generation from prompts and optional reference images.
|
|
| 8 |
|
| 9 |
## AI 创作台
|
| 10 |
|
| 11 |
-
**A. 图片** — 0–3 张参考图 + 提示词 → 一张图。思考
|
| 12 |
|
| 13 |
**B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置;**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**;仅文案时 4/6/8s(以 API 为准)。
|
| 14 |
|
|
|
|
| 8 |
|
| 9 |
## AI 创作台
|
| 10 |
|
| 11 |
+
**A. 图片** — 0–3 张参考图 + 提示词 → 一张图。模型含 Nano Banana(`gemini-2.5-flash-image`,无长思考)、Nano Banana 2(`gemini-3.1-flash-image-preview`,可选长思考)、Nano Banana Pro(`gemini-3-pro-image-preview`,长思考);默认选项为 Nano Banana 2 快速。宽高比、分辨率见 `generation_options.json`。
|
| 12 |
|
| 13 |
**B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置;**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**;仅文案时 4/6/8s(以 API 为准)。
|
| 14 |
|
gen_image_image_cond.py
CHANGED
|
@@ -78,8 +78,12 @@ def load_pil_image(image_path: Path) -> Image.Image:
|
|
| 78 |
|
| 79 |
def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
|
| 80 |
kwargs: dict = {"aspect_ratio": args.aspect_ratio}
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
kwargs["image_size"] = args.resolution
|
| 84 |
return types.ImageConfig(**kwargs)
|
| 85 |
|
|
|
|
| 78 |
|
| 79 |
def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
|
| 80 |
kwargs: dict = {"aspect_ratio": args.aspect_ratio}
|
| 81 |
+
image_models_with_size = {
|
| 82 |
+
"gemini-2.5-flash-image",
|
| 83 |
+
"gemini-3.1-flash-image-preview",
|
| 84 |
+
"gemini-3-pro-image-preview",
|
| 85 |
+
}
|
| 86 |
+
if args.model in image_models_with_size:
|
| 87 |
kwargs["image_size"] = args.resolution
|
| 88 |
return types.ImageConfig(**kwargs)
|
| 89 |
|
gen_image_prompt_only.py
CHANGED
|
@@ -56,8 +56,12 @@ def parse_args() -> argparse.Namespace:
|
|
| 56 |
|
| 57 |
def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
|
| 58 |
kwargs: dict = {"aspect_ratio": args.aspect_ratio}
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
kwargs["image_size"] = args.resolution
|
| 62 |
return types.ImageConfig(**kwargs)
|
| 63 |
|
|
|
|
| 56 |
|
| 57 |
def build_image_config(args: argparse.Namespace) -> types.ImageConfig:
|
| 58 |
kwargs: dict = {"aspect_ratio": args.aspect_ratio}
|
| 59 |
+
image_models_with_size = {
|
| 60 |
+
"gemini-2.5-flash-image",
|
| 61 |
+
"gemini-3.1-flash-image-preview",
|
| 62 |
+
"gemini-3-pro-image-preview",
|
| 63 |
+
}
|
| 64 |
+
if args.model in image_models_with_size:
|
| 65 |
kwargs["image_size"] = args.resolution
|
| 66 |
return types.ImageConfig(**kwargs)
|
| 67 |
|
web/backend/services/gemini_image.py
CHANGED
|
@@ -9,7 +9,14 @@ from google import genai
|
|
| 9 |
from google.genai import types
|
| 10 |
from PIL import Image
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def build_image_config(
|
|
@@ -18,7 +25,7 @@ def build_image_config(
|
|
| 18 |
resolution: str,
|
| 19 |
) -> types.ImageConfig:
|
| 20 |
kwargs: dict = {"aspect_ratio": aspect_ratio}
|
| 21 |
-
if model in
|
| 22 |
kwargs["image_size"] = resolution
|
| 23 |
return types.ImageConfig(**kwargs)
|
| 24 |
|
|
@@ -46,6 +53,7 @@ def generate_image_bytes(
|
|
| 46 |
"response_modalities": ["IMAGE"],
|
| 47 |
"image_config": image_config,
|
| 48 |
}
|
|
|
|
| 49 |
if thinking_level:
|
| 50 |
if model == "gemini-3.1-flash-image-preview":
|
| 51 |
config_kwargs["thinking_config"] = types.ThinkingConfig(
|
|
|
|
| 9 |
from google.genai import types
|
| 10 |
from PIL import Image
|
| 11 |
|
| 12 |
+
# Models that accept ImageConfig.image_size (1K / 2K / 4K) in the Gemini image API.
|
| 13 |
+
IMAGE_MODELS_WITH_SIZE = frozenset(
|
| 14 |
+
{
|
| 15 |
+
"gemini-2.5-flash-image",
|
| 16 |
+
"gemini-3.1-flash-image-preview",
|
| 17 |
+
"gemini-3-pro-image-preview",
|
| 18 |
+
}
|
| 19 |
+
)
|
| 20 |
|
| 21 |
|
| 22 |
def build_image_config(
|
|
|
|
| 25 |
resolution: str,
|
| 26 |
) -> types.ImageConfig:
|
| 27 |
kwargs: dict = {"aspect_ratio": aspect_ratio}
|
| 28 |
+
if model in IMAGE_MODELS_WITH_SIZE:
|
| 29 |
kwargs["image_size"] = resolution
|
| 30 |
return types.ImageConfig(**kwargs)
|
| 31 |
|
|
|
|
| 53 |
"response_modalities": ["IMAGE"],
|
| 54 |
"image_config": image_config,
|
| 55 |
}
|
| 56 |
+
# gemini-2.5-flash-image does not support thinking (API capability table).
|
| 57 |
if thinking_level:
|
| 58 |
if model == "gemini-3.1-flash-image-preview":
|
| 59 |
config_kwargs["thinking_config"] = types.ThinkingConfig(
|
web/backend/static/assets/index-l1IkTo6p.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
web/backend/static/index.html
CHANGED
|
@@ -53,7 +53,7 @@
|
|
| 53 |
}
|
| 54 |
})();
|
| 55 |
</script>
|
| 56 |
-
<script type="module" crossorigin src="/assets/index-
|
| 57 |
<link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
|
| 58 |
</head>
|
| 59 |
<body>
|
|
|
|
| 53 |
}
|
| 54 |
})();
|
| 55 |
</script>
|
| 56 |
+
<script type="module" crossorigin src="/assets/index-l1IkTo6p.js"></script>
|
| 57 |
<link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
|
| 58 |
</head>
|
| 59 |
<body>
|
web/config/generation_options.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"image": {
|
| 3 |
"models": [
|
|
|
|
| 4 |
{ "value": "gemini-3.1-flash-image-preview", "label": "Nano Banana 2" },
|
| 5 |
{ "value": "gemini-3-pro-image-preview", "label": "Nano Banana Pro" }
|
| 6 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"image": {
|
| 3 |
"models": [
|
| 4 |
+
{ "value": "gemini-2.5-flash-image", "label": "Nano Banana" },
|
| 5 |
{ "value": "gemini-3.1-flash-image-preview", "label": "Nano Banana 2" },
|
| 6 |
{ "value": "gemini-3-pro-image-preview", "label": "Nano Banana Pro" }
|
| 7 |
],
|
web/frontend/src/pages/ImageGen.tsx
CHANGED
|
@@ -5,13 +5,17 @@ import { ElapsedTimer } from "../components/ElapsedTimer";
|
|
| 5 |
import { ImageSlot } from "../components/ImageSlot";
|
| 6 |
import { useGenerationOptions } from "../context/GenerationOptionsContext";
|
| 7 |
|
|
|
|
| 8 |
const FLASH_ID = "gemini-3.1-flash-image-preview";
|
| 9 |
const PRO_ID = "gemini-3-pro-image-preview";
|
| 10 |
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
export function ImageGen() {
|
| 14 |
const opts = useGenerationOptions();
|
|
|
|
|
|
|
| 15 |
const flashLabel =
|
| 16 |
opts.image.models.find((m) => m.value === FLASH_ID)?.label ?? "Nano Banana 2";
|
| 17 |
const proLabel =
|
|
@@ -31,11 +35,12 @@ export function ImageGen() {
|
|
| 31 |
|
| 32 |
const variantOptions = useMemo(
|
| 33 |
() => [
|
|
|
|
| 34 |
{ id: "flash-minimal" as const, label: flashLabel },
|
| 35 |
{ id: "flash-high" as const, label: `${flashLabel}(长思考)` },
|
| 36 |
{ id: "pro-high" as const, label: `${proLabel}(长思考)` },
|
| 37 |
],
|
| 38 |
-
[flashLabel, proLabel],
|
| 39 |
);
|
| 40 |
|
| 41 |
async function onSubmit(e: FormEvent) {
|
|
@@ -49,6 +54,10 @@ export function ImageGen() {
|
|
| 49 |
let model: string;
|
| 50 |
let thinking: "minimal" | "high";
|
| 51 |
switch (variant) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
case "flash-minimal":
|
| 53 |
model = FLASH_ID;
|
| 54 |
thinking = "minimal";
|
|
@@ -101,7 +110,7 @@ export function ImageGen() {
|
|
| 101 |
填写提示词;参考图最多三张。再选比例与分辨率。
|
| 102 |
</p>
|
| 103 |
<p className="text-xs text-mist leading-relaxed">
|
| 104 |
-
{flashLabel} 与 {proLabel} 为实验性,接口或效果可能变更。
|
| 105 |
</p>
|
| 106 |
</div>
|
| 107 |
|
|
@@ -124,7 +133,7 @@ export function ImageGen() {
|
|
| 124 |
))}
|
| 125 |
</select>
|
| 126 |
<p className="text-xs text-mist mt-1.5">
|
| 127 |
-
{flashLabel} 为默认
|
| 128 |
</p>
|
| 129 |
</div>
|
| 130 |
|
|
|
|
| 5 |
import { ImageSlot } from "../components/ImageSlot";
|
| 6 |
import { useGenerationOptions } from "../context/GenerationOptionsContext";
|
| 7 |
|
| 8 |
+
const FLASH25_ID = "gemini-2.5-flash-image";
|
| 9 |
const FLASH_ID = "gemini-3.1-flash-image-preview";
|
| 10 |
const PRO_ID = "gemini-3-pro-image-preview";
|
| 11 |
|
| 12 |
+
/** Order: older flash → Nano Banana 2 (default) → long-thinking → Pro */
|
| 13 |
+
type ImageGenVariant = "nb25-minimal" | "flash-minimal" | "flash-high" | "pro-high";
|
| 14 |
|
| 15 |
export function ImageGen() {
|
| 16 |
const opts = useGenerationOptions();
|
| 17 |
+
const nano25Label =
|
| 18 |
+
opts.image.models.find((m) => m.value === FLASH25_ID)?.label ?? "Nano Banana";
|
| 19 |
const flashLabel =
|
| 20 |
opts.image.models.find((m) => m.value === FLASH_ID)?.label ?? "Nano Banana 2";
|
| 21 |
const proLabel =
|
|
|
|
| 35 |
|
| 36 |
const variantOptions = useMemo(
|
| 37 |
() => [
|
| 38 |
+
{ id: "nb25-minimal" as const, label: nano25Label },
|
| 39 |
{ id: "flash-minimal" as const, label: flashLabel },
|
| 40 |
{ id: "flash-high" as const, label: `${flashLabel}(长思考)` },
|
| 41 |
{ id: "pro-high" as const, label: `${proLabel}(长思考)` },
|
| 42 |
],
|
| 43 |
+
[nano25Label, flashLabel, proLabel],
|
| 44 |
);
|
| 45 |
|
| 46 |
async function onSubmit(e: FormEvent) {
|
|
|
|
| 54 |
let model: string;
|
| 55 |
let thinking: "minimal" | "high";
|
| 56 |
switch (variant) {
|
| 57 |
+
case "nb25-minimal":
|
| 58 |
+
model = FLASH25_ID;
|
| 59 |
+
thinking = "minimal";
|
| 60 |
+
break;
|
| 61 |
case "flash-minimal":
|
| 62 |
model = FLASH_ID;
|
| 63 |
thinking = "minimal";
|
|
|
|
| 110 |
填写提示词;参考图最多三张。再选比例与分辨率。
|
| 111 |
</p>
|
| 112 |
<p className="text-xs text-mist leading-relaxed">
|
| 113 |
+
{nano25Label} 为稳定版;{flashLabel} 与 {proLabel} 为实验性,接口或效果可能变更。
|
| 114 |
</p>
|
| 115 |
</div>
|
| 116 |
|
|
|
|
| 133 |
))}
|
| 134 |
</select>
|
| 135 |
<p className="text-xs text-mist mt-1.5">
|
| 136 |
+
{nano25Label} 为上一代快速;{flashLabel} 为默认推荐;{flashLabel}(长思考)与 {proLabel}(长思考)为长思考模式,更慢、更细;{proLabel} 为 Pro 路线。
|
| 137 |
</p>
|
| 138 |
</div>
|
| 139 |
|
web/frontend/src/pages/ToolsUpscale.tsx
CHANGED
|
@@ -158,7 +158,7 @@ export function ToolsUpscale() {
|
|
| 158 |
原图必选。选图后宽高比自动匹配(可改);分辨率默认 4K。比例差太远时会提示。
|
| 159 |
</p>
|
| 160 |
<p className="text-xs text-mist leading-relaxed">
|
| 161 |
-
下拉中的名称为配置所示;
|
| 162 |
</p>
|
| 163 |
</div>
|
| 164 |
|
|
|
|
| 158 |
原图必选。选图后宽高比自动匹配(可改);分辨率默认 4K。比例差太远时会提示。
|
| 159 |
</p>
|
| 160 |
<p className="text-xs text-mist leading-relaxed">
|
| 161 |
+
下拉中的名称为配置所示;Nano Banana 为稳定版;Nano Banana 2 与 Nano Banana Pro 为实验性,接口或效果可能变更。
|
| 162 |
</p>
|
| 163 |
</div>
|
| 164 |
|