LehongWu commited on
Commit
38b7ac0
·
verified ·
1 Parent(s): 7d2ad47

Upload folder using huggingface_hub

Browse files
docs/SPEC_WEB_UI.md CHANGED
@@ -10,9 +10,9 @@ Image and video generation from prompts and optional reference images.
10
 
11
  **A. 图片** — 0–3 张参考图 + 提示词 → 一张图。模型含 Nano Banana(`gemini-2.5-flash-image`,无长思考)、Nano Banana 2(`gemini-3.1-flash-image-preview`,可选长思考)、Nano Banana Pro(`gemini-3-pro-image-preview`,长思考);默认选项为 Nano Banana 2 快速。宽高比、分辨率见 `generation_options.json`。
12
 
13
- **B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置;**Veo 3.1 Lite** 在配置中设 **`supports_reference_images: false`**。有参考图时时长 **8s**;仅文案时 4/6/8s(以 API 为准)。
14
 
15
- **C. 首尾过渡** — 起始帧必填 + 提示词;结尾帧可选或「与起始相同」。时长 **固定 8s**。
16
 
17
  ## 辅助工具
18
 
 
10
 
11
  **A. 图片** — 0–3 张参考图 + 提示词 → 一张图。模型含 Nano Banana(`gemini-2.5-flash-image`,无长思考)、Nano Banana 2(`gemini-3.1-flash-image-preview`,可选长思考)、Nano Banana Pro(`gemini-3-pro-image-preview`,长思考);默认选项为 Nano Banana 2 快速。宽高比、分辨率见 `generation_options.json`。
12
 
13
+ **B. 视频** — 0–3 张参考图 + 提示词 → 短视频。Veo 模型可配置;**Veo 3 / Veo 3 Fast** 与 **Veo 3.1 Lite** **`supports_reference_images: false`**(前两者仅纯提示词)默认模型 **Veo 3.1 Fast**。有参考图时时长 **8s**;仅文案时 4/6/8s(以 API 为准)。
14
 
15
+ **C. 首尾过渡** — 起始帧必填 + 提示词;时长 **固定 8s**。**Veo 3 / Veo 3 Fast**(`supports_end_frame: false`)仅起始;**Veo 3.1** 可选独立结尾帧或「与起始相同」。默认模型 **Veo 3.1 Fast**。
16
 
17
  ## 辅助工具
18
 
docs/WEB_DEV_GUIDE.md CHANGED
@@ -40,7 +40,7 @@ export SESSION_SECRET="$(openssl rand -hex 32)"
40
  Edit **`web/config/generation_options.json`** (or path from `GENERATION_OPTIONS_PATH`). UI loads it via **`GET /api/config/generation-options`** after login. JSON-only changes need no frontend rebuild.
41
 
42
  - **`image`**: `models`, `aspect_ratios`, `resolutions`.
43
- - **`video`** / **`video_frames`**: `models`, `aspect_ratios`, `resolutions`, `durations_seconds`. On **`video`**, **`supports_reference_images: false`** hides reference images (e.g. Veo 3.1 Lite). With reference images, duration is **8s**. **首尾帧** route is always **8s**.
44
 
45
  ## Build frontend
46
 
 
40
  Edit **`web/config/generation_options.json`** (or path from `GENERATION_OPTIONS_PATH`). UI loads it via **`GET /api/config/generation-options`** after login. JSON-only changes need no frontend rebuild.
41
 
42
  - **`image`**: `models`, `aspect_ratios`, `resolutions`.
43
+ - **`video`** / **`video_frames`**: `models`, `aspect_ratios`, `resolutions`, `durations_seconds`. On **`video`**, **`supports_reference_images: false`** disables reference images for that model (e.g. Veo 3 / Veo 3 Fast — prompt-only; Veo 3.1 Lite — also no reference images). With reference images, duration is **8s**. **首尾帧** route is always **8s**. On **`video_frames`**, **`supports_end_frame: false`** (e.g. Veo 3 / Veo 3 Fast) means only the start frame is used; end-frame UI and last-frame conditioning are omitted.
44
 
45
  ## Build frontend
46
 
web/backend/routes/generate.py CHANGED
@@ -16,6 +16,7 @@ from web.backend.deps import SessionUser
16
  from web.backend.services import gemini_image, gemini_video
17
  from web.backend.services.generation_options import (
18
  video_model_supports_4k,
 
19
  video_model_supports_reference_images,
20
  )
21
 
@@ -217,6 +218,10 @@ async def generate_video_frames(
217
  same_as_start = _parse_form_bool(end_same_as_start)
218
  if same_as_start:
219
  end_raw = None
 
 
 
 
220
 
221
  # First/last frame conditioning: API requires 8s duration (same family of rules as reference images).
222
  dur = 8
 
16
  from web.backend.services import gemini_image, gemini_video
17
  from web.backend.services.generation_options import (
18
  video_model_supports_4k,
19
+ video_model_supports_end_frame,
20
  video_model_supports_reference_images,
21
  )
22
 
 
218
  same_as_start = _parse_form_bool(end_same_as_start)
219
  if same_as_start:
220
  end_raw = None
221
+ if not video_model_supports_end_frame(m):
222
+ # Veo 3 / Veo 3 Fast: start frame only (no last-frame conditioning).
223
+ end_raw = None
224
+ same_as_start = False
225
 
226
  # First/last frame conditioning: API requires 8s duration (same family of rules as reference images).
227
  dur = 8
web/backend/services/generation_options.py CHANGED
@@ -53,3 +53,15 @@ def video_model_supports_4k(model: str) -> bool:
53
  if isinstance(m, dict) and m.get("value") == model:
54
  return bool(m.get("supports_4k", True))
55
  return True
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  if isinstance(m, dict) and m.get("value") == model:
54
  return bool(m.get("supports_4k", True))
55
  return True
56
+
57
+
58
+ def video_model_supports_end_frame(model: str) -> bool:
59
+ """Read `video_frames.models[].supports_end_frame` (default True if key absent)."""
60
+ try:
61
+ data = load_generation_options()
62
+ except (OSError, ValueError, json.JSONDecodeError):
63
+ return True
64
+ for m in data.get("video_frames", {}).get("models", []):
65
+ if isinstance(m, dict) and m.get("value") == model:
66
+ return bool(m.get("supports_end_frame", True))
67
+ return True
web/backend/static/assets/index-J-30f0lA.js ADDED
The diff for this file is too large to render. See raw diff
 
web/backend/static/index.html CHANGED
@@ -53,7 +53,7 @@
53
  }
54
  })();
55
  </script>
56
- <script type="module" crossorigin src="/assets/index-l1IkTo6p.js"></script>
57
  <link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
58
  </head>
59
  <body>
 
53
  }
54
  })();
55
  </script>
56
+ <script type="module" crossorigin src="/assets/index-J-30f0lA.js"></script>
57
  <link rel="stylesheet" crossorigin href="/assets/index-JnUJDL9j.css">
58
  </head>
59
  <body>
web/config/generation_options.json CHANGED
@@ -21,8 +21,8 @@
21
  },
22
  "video": {
23
  "models": [
24
- { "value": "veo-3.0-fast-generate-001", "label": "Veo 3 Fast(快速)", "supports_reference_images": true },
25
- { "value": "veo-3.0-generate-001", "label": "Veo 3(标准)", "supports_reference_images": true },
26
  { "value": "veo-3.1-lite-generate-preview", "label": "Veo 3.1 Lite(轻量)", "supports_reference_images": false, "supports_4k": false },
27
  { "value": "veo-3.1-fast-generate-preview", "label": "Veo 3.1 Fast(快速)", "supports_reference_images": true },
28
  { "value": "veo-3.1-generate-preview", "label": "Veo 3.1(标准)", "supports_reference_images": true }
@@ -33,8 +33,8 @@
33
  },
34
  "video_frames": {
35
  "models": [
36
- { "value": "veo-3.0-fast-generate-001", "label": "Veo 3 Fast(快速)" },
37
- { "value": "veo-3.0-generate-001", "label": "Veo 3(标准)" },
38
  { "value": "veo-3.1-lite-generate-preview", "label": "Veo 3.1 Lite(轻量)", "supports_4k": false },
39
  { "value": "veo-3.1-fast-generate-preview", "label": "Veo 3.1 Fast(快速)" },
40
  { "value": "veo-3.1-generate-preview", "label": "Veo 3.1(标准)" }
 
21
  },
22
  "video": {
23
  "models": [
24
+ { "value": "veo-3.0-fast-generate-001", "label": "Veo 3 Fast(快速)", "supports_reference_images": false },
25
+ { "value": "veo-3.0-generate-001", "label": "Veo 3(标准)", "supports_reference_images": false },
26
  { "value": "veo-3.1-lite-generate-preview", "label": "Veo 3.1 Lite(轻量)", "supports_reference_images": false, "supports_4k": false },
27
  { "value": "veo-3.1-fast-generate-preview", "label": "Veo 3.1 Fast(快速)", "supports_reference_images": true },
28
  { "value": "veo-3.1-generate-preview", "label": "Veo 3.1(标准)", "supports_reference_images": true }
 
33
  },
34
  "video_frames": {
35
  "models": [
36
+ { "value": "veo-3.0-fast-generate-001", "label": "Veo 3 Fast(快速)", "supports_end_frame": false },
37
+ { "value": "veo-3.0-generate-001", "label": "Veo 3(标准)", "supports_end_frame": false },
38
  { "value": "veo-3.1-lite-generate-preview", "label": "Veo 3.1 Lite(轻量)", "supports_4k": false },
39
  { "value": "veo-3.1-fast-generate-preview", "label": "Veo 3.1 Fast(快速)" },
40
  { "value": "veo-3.1-generate-preview", "label": "Veo 3.1(标准)" }
web/frontend/src/context/GenerationOptionsContext.tsx CHANGED
@@ -11,12 +11,14 @@ import { apiFetch } from "../api";
11
  export type LabeledModel = { value: string; label: string };
12
  export type ThinkingLevelOption = { value: string; label: string };
13
 
14
- /** Video model entry; `supports_reference_images` / `supports_4k` default to true if omitted. */
15
  export type VideoModelOption = {
16
  value: string;
17
  label: string;
18
  supports_reference_images?: boolean;
19
  supports_4k?: boolean;
 
 
20
  };
21
 
22
  export type GenerationOptions = {
@@ -135,6 +137,11 @@ export function modelSupports4k(models: VideoModelOption[], value: string): bool
135
  return m?.supports_4k !== false;
136
  }
137
 
 
 
 
 
 
138
  export function useGenerationOptions(): GenerationOptions {
139
  const ctx = useContext(GenerationOptionsContext);
140
  if (!ctx) {
 
11
  export type LabeledModel = { value: string; label: string };
12
  export type ThinkingLevelOption = { value: string; label: string };
13
 
14
+ /** Video model entry; `supports_reference_images` / `supports_4k` / `supports_end_frame` default to true if omitted. */
15
  export type VideoModelOption = {
16
  value: string;
17
  label: string;
18
  supports_reference_images?: boolean;
19
  supports_4k?: boolean;
20
+ /** Start/end mode: if false, only start frame (e.g. Veo 3 family). */
21
+ supports_end_frame?: boolean;
22
  };
23
 
24
  export type GenerationOptions = {
 
137
  return m?.supports_4k !== false;
138
  }
139
 
140
+ export function modelSupportsEndFrame(models: VideoModelOption[], value: string): boolean {
141
+ const m = models.find((x) => x.value === value);
142
+ return m?.supports_end_frame !== false;
143
+ }
144
+
145
  export function useGenerationOptions(): GenerationOptions {
146
  const ctx = useContext(GenerationOptionsContext);
147
  if (!ctx) {
web/frontend/src/pages/VideoFrames.tsx CHANGED
@@ -4,21 +4,24 @@ import { DownloadMediaButton } from "../components/DownloadMediaButton";
4
  import { ElapsedTimer } from "../components/ElapsedTimer";
5
  import { ImageSlot } from "../components/ImageSlot";
6
  import {
7
- defaultFastModelValue,
8
  modelSupports4k,
 
9
  useGenerationOptions,
10
  } from "../context/GenerationOptionsContext";
11
 
 
 
12
  export function VideoFrames() {
13
  const opts = useGenerationOptions();
14
  const vf = opts.video_frames;
15
  const [prompt, setPrompt] = useState("");
16
- const [model, setModel] = useState(() =>
17
- defaultFastModelValue(vf.models, "veo-3.0-fast-generate-001"),
18
- );
 
19
  const [aspect, setAspect] = useState(vf.aspect_ratios[0] ?? "16:9");
20
  const [resolution, setResolution] = useState(
21
- vf.resolutions[1] ?? vf.resolutions[0] ?? "1080p",
22
  );
23
  const [start, setStart] = useState<File | null>(null);
24
  const [end, setEnd] = useState<File | null>(null);
@@ -29,6 +32,7 @@ export function VideoFrames() {
29
  const formRef = useRef<HTMLFormElement>(null);
30
 
31
  const supports4k = modelSupports4k(vf.models, model);
 
32
 
33
  useEffect(() => {
34
  if (endSameAsStart) {
@@ -36,6 +40,13 @@ export function VideoFrames() {
36
  }
37
  }, [endSameAsStart]);
38
 
 
 
 
 
 
 
 
39
  useEffect(() => {
40
  if (!supports4k && resolution === "4k") {
41
  setResolution("1080p");
@@ -61,9 +72,13 @@ export function VideoFrames() {
61
  fd.append("aspect_ratio", aspect);
62
  fd.append("duration_seconds", "8");
63
  fd.append("start_frame", start);
64
- fd.append("end_same_as_start", endSameAsStart ? "true" : "false");
65
- if (!endSameAsStart && end) {
66
- fd.append("end_frame", end);
 
 
 
 
67
  }
68
 
69
  setBusy(true);
@@ -91,7 +106,10 @@ export function VideoFrames() {
91
  <h1 className="font-display text-2xl font-semibold text-ink mb-2">视频生成(首尾过渡)</h1>
92
  <div className="mb-6 space-y-1.5">
93
  <p className="text-mist text-sm leading-relaxed">
94
- 起始帧(必选);填写提示词(必填)。结尾帧(可选),或与起始帧相同。时长 8 秒。
 
 
 
95
  </p>
96
  <p className="text-xs text-mist leading-relaxed">
97
  Veo 3 与 Veo 3 Fast 为稳定版;Veo 3.1 系列为实验性,接口或效果可能变更。
@@ -175,20 +193,28 @@ export function VideoFrames() {
175
  <p className="text-sm font-semibold text-ink mb-2">帧</p>
176
  <div className="space-y-3">
177
  <ImageSlot label="起始帧" file={start} onChange={setStart} />
178
- <label className="flex items-center gap-2 cursor-pointer text-sm text-ink select-none">
179
- <input
180
- type="checkbox"
181
- className="rounded border-slate-300 text-ink focus:ring-clay/50"
182
- checked={endSameAsStart}
183
- onChange={(e) => setEndSameAsStart(e.target.checked)}
184
- />
185
- 结尾帧与起始帧相同
186
- </label>
187
- {!endSameAsStart && (
188
- <ImageSlot label="结尾帧" file={end} onChange={setEnd} />
189
- )}
190
- {endSameAsStart && (
191
- <p className="text-xs text-mist pl-0.5">结尾帧与起始帧一张图。</p>
 
 
 
 
 
 
 
 
192
  )}
193
  </div>
194
  </div>
 
4
  import { ElapsedTimer } from "../components/ElapsedTimer";
5
  import { ImageSlot } from "../components/ImageSlot";
6
  import {
 
7
  modelSupports4k,
8
+ modelSupportsEndFrame,
9
  useGenerationOptions,
10
  } from "../context/GenerationOptionsContext";
11
 
12
+ const DEFAULT_VIDEO_FRAMES_MODEL = "veo-3.1-fast-generate-preview";
13
+
14
  export function VideoFrames() {
15
  const opts = useGenerationOptions();
16
  const vf = opts.video_frames;
17
  const [prompt, setPrompt] = useState("");
18
+ const [model, setModel] = useState(() => {
19
+ const preferred = vf.models.find((m) => m.value === DEFAULT_VIDEO_FRAMES_MODEL);
20
+ return preferred?.value ?? vf.models[0]?.value ?? DEFAULT_VIDEO_FRAMES_MODEL;
21
+ });
22
  const [aspect, setAspect] = useState(vf.aspect_ratios[0] ?? "16:9");
23
  const [resolution, setResolution] = useState(
24
+ vf.resolutions[0] ?? "720p",
25
  );
26
  const [start, setStart] = useState<File | null>(null);
27
  const [end, setEnd] = useState<File | null>(null);
 
32
  const formRef = useRef<HTMLFormElement>(null);
33
 
34
  const supports4k = modelSupports4k(vf.models, model);
35
+ const supportsEndFrame = modelSupportsEndFrame(vf.models, model);
36
 
37
  useEffect(() => {
38
  if (endSameAsStart) {
 
40
  }
41
  }, [endSameAsStart]);
42
 
43
+ useEffect(() => {
44
+ if (!supportsEndFrame) {
45
+ setEndSameAsStart(false);
46
+ setEnd(null);
47
+ }
48
+ }, [supportsEndFrame]);
49
+
50
  useEffect(() => {
51
  if (!supports4k && resolution === "4k") {
52
  setResolution("1080p");
 
72
  fd.append("aspect_ratio", aspect);
73
  fd.append("duration_seconds", "8");
74
  fd.append("start_frame", start);
75
+ if (supportsEndFrame) {
76
+ fd.append("end_same_as_start", endSameAsStart ? "true" : "false");
77
+ if (!endSameAsStart && end) {
78
+ fd.append("end_frame", end);
79
+ }
80
+ } else {
81
+ fd.append("end_same_as_start", "false");
82
  }
83
 
84
  setBusy(true);
 
106
  <h1 className="font-display text-2xl font-semibold text-ink mb-2">视频生成(首尾过渡)</h1>
107
  <div className="mb-6 space-y-1.5">
108
  <p className="text-mist text-sm leading-relaxed">
109
+ 起始帧(必选);填写提示词(必填)。时长 8 秒。
110
+ <span className="block mt-1">
111
+ Veo 3 与 Veo 3 Fast 仅支持起始帧,不提供结尾帧选项。Veo 3.1 系列可选用独立结尾帧,或与起始帧相同。
112
+ </span>
113
  </p>
114
  <p className="text-xs text-mist leading-relaxed">
115
  Veo 3 与 Veo 3 Fast 为稳定版;Veo 3.1 系列为实验性,接口或效果可能变更。
 
193
  <p className="text-sm font-semibold text-ink mb-2">帧</p>
194
  <div className="space-y-3">
195
  <ImageSlot label="起始帧" file={start} onChange={setStart} />
196
+ {!supportsEndFrame ? (
197
+ <p className="text-xs text-mist pl-0.5">
198
+ 当前模型(Veo 3 / Veo 3 Fast)仅使用起始帧,不设置结尾帧。
199
+ </p>
200
+ ) : (
201
+ <>
202
+ <label className="flex items-center gap-2 cursor-pointer text-sm text-ink select-none">
203
+ <input
204
+ type="checkbox"
205
+ className="rounded border-slate-300 text-ink focus:ring-clay/50"
206
+ checked={endSameAsStart}
207
+ onChange={(e) => setEndSameAsStart(e.target.checked)}
208
+ />
209
+ 结尾帧与起始帧
210
+ </label>
211
+ {!endSameAsStart && (
212
+ <ImageSlot label="结尾帧" file={end} onChange={setEnd} />
213
+ )}
214
+ {endSameAsStart && (
215
+ <p className="text-xs text-mist pl-0.5">结尾帧与起始帧为同一张图。</p>
216
+ )}
217
+ </>
218
  )}
219
  </div>
220
  </div>
web/frontend/src/pages/VideoGen.tsx CHANGED
@@ -10,18 +10,21 @@ import {
10
  useGenerationOptions,
11
  } from "../context/GenerationOptionsContext";
12
 
 
 
13
  export function VideoGen() {
14
  const opts = useGenerationOptions();
15
  const durs = opts.video.durations_seconds;
16
  const defaultDur = durs.includes(4) ? 4 : durs[0] ?? 4;
17
 
18
  const [prompt, setPrompt] = useState("");
19
- const [model, setModel] = useState(() =>
20
- defaultFastModelValue(opts.video.models, "veo-3.0-fast-generate-001"),
21
- );
 
22
  const [aspect, setAspect] = useState(opts.video.aspect_ratios[0] ?? "16:9");
23
  const [resolution, setResolution] = useState(
24
- opts.video.resolutions[1] ?? opts.video.resolutions[0] ?? "1080p",
25
  );
26
  const [duration, setDuration] = useState(defaultDur);
27
  const [f0, setF0] = useState<File | null>(null);
@@ -59,7 +62,9 @@ export function VideoGen() {
59
  );
60
  const fallback = defaultFastModelValue(
61
  support,
62
- support[0]?.value ?? "veo-3.0-fast-generate-001",
 
 
63
  );
64
  if (fallback) setModel(fallback);
65
  }, [hasRefs, model, opts.video.models]);
@@ -112,7 +117,7 @@ export function VideoGen() {
112
  <div className="mb-6 space-y-1.5">
113
  <p className="text-mist text-sm leading-relaxed">
114
  填写提示词;参考图最多三张。有参考图、1080p 或 4K 时固定 8 秒;720p 仅文字时可选 4/6
115
- 秒。Veo 3.1 Lite 不支持参考图与 4K。
116
  </p>
117
  <p className="text-xs text-mist leading-relaxed">
118
  Veo 3 与 Veo 3 Fast 为稳定版;Veo 3.1 系列为实验性,接口或效果可能变更。
@@ -219,7 +224,8 @@ export function VideoGen() {
219
  参考图(最多三张)
220
  {hasRefs && (
221
  <span className="block text-xs font-normal text-mist mt-1">
222
- 有参考图时勿选 Veo 3.1 Lite;可选 Veo 3、Veo 3 FastVeo 3.1 Fast Veo 3.1(标准)。
 
223
  </span>
224
  )}
225
  </p>
 
10
  useGenerationOptions,
11
  } from "../context/GenerationOptionsContext";
12
 
13
+ const DEFAULT_VIDEO_MODEL = "veo-3.1-fast-generate-preview";
14
+
15
  export function VideoGen() {
16
  const opts = useGenerationOptions();
17
  const durs = opts.video.durations_seconds;
18
  const defaultDur = durs.includes(4) ? 4 : durs[0] ?? 4;
19
 
20
  const [prompt, setPrompt] = useState("");
21
+ const [model, setModel] = useState(() => {
22
+ const preferred = opts.video.models.find((m) => m.value === DEFAULT_VIDEO_MODEL);
23
+ return preferred?.value ?? defaultFastModelValue(opts.video.models, DEFAULT_VIDEO_MODEL);
24
+ });
25
  const [aspect, setAspect] = useState(opts.video.aspect_ratios[0] ?? "16:9");
26
  const [resolution, setResolution] = useState(
27
+ opts.video.resolutions[0] ?? "720p",
28
  );
29
  const [duration, setDuration] = useState(defaultDur);
30
  const [f0, setF0] = useState<File | null>(null);
 
62
  );
63
  const fallback = defaultFastModelValue(
64
  support,
65
+ support.find((m) => m.value === DEFAULT_VIDEO_MODEL)?.value ??
66
+ support[0]?.value ??
67
+ DEFAULT_VIDEO_MODEL,
68
  );
69
  if (fallback) setModel(fallback);
70
  }, [hasRefs, model, opts.video.models]);
 
117
  <div className="mb-6 space-y-1.5">
118
  <p className="text-mist text-sm leading-relaxed">
119
  填写提示词;参考图最多三张。有参考图、1080p 或 4K 时固定 8 秒;720p 仅文字时可选 4/6
120
+ 秒。Veo 3 与 Veo 3 Fast 仅支持纯提示词(无参考图);Veo 3.1 Lite 不支持参考图与 4K。
121
  </p>
122
  <p className="text-xs text-mist leading-relaxed">
123
  Veo 3 与 Veo 3 Fast 为稳定版;Veo 3.1 系列为实验性,接口或效果可能变更。
 
224
  参考图(最多三张)
225
  {hasRefs && (
226
  <span className="block text-xs font-normal text-mist mt-1">
227
+ 有参考图时勿选 Veo 3 / Veo 3 Fast(仅纯提示词)与 Veo 3.1 Lite;可选 Veo 3.1 Fast 或
228
+ Veo 3.1(标准)。
229
  </span>
230
  )}
231
  </p>