Gemini-VideoGeneration / gen_video_prompt_only.py
LehongWu's picture
Upload folder using huggingface_hub
6249f41 verified
#!/usr/bin/env python3
import argparse
import json
import os
import subprocess
import sys
import tempfile
import time
from pathlib import Path
from google import genai
from google.genai import types
def strip_audio(video_path: Path) -> None:
"""Remove audio track from video using ffmpeg (video stream copied, no re-encode)."""
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
temp_path = Path(f.name)
try:
subprocess.run(
["ffmpeg", "-y", "-i", str(video_path), "-an", "-c:v", "copy", str(temp_path)],
check=True,
capture_output=True,
)
temp_path.replace(video_path)
finally:
if temp_path.exists():
temp_path.unlink()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Generate a video from a text prompt using Gemini (Veo)."
)
parser.add_argument("--prompt", required=True, help="Prompt describing the video.")
parser.add_argument(
"--model",
default="veo-3.0-fast-generate-001",
help="Video generation model name.",
)
parser.add_argument("--name", default="generated_video", help="Base output filename.")
parser.add_argument(
"--output-dir",
"--output_dir",
dest="output_dir",
default="output_dir",
help="Directory to save outputs (default: output_dir).",
)
parser.add_argument("--resolution", default="1080p", help="e.g. 720p, 1080p, 4k")
parser.add_argument("--duration", type=int, default=8, help="Video length in seconds.")
parser.add_argument(
"--aspect-ratio",
default="16:9",
help="Aspect ratio (e.g. 16:9, 9:16, 1:1).",
)
parser.add_argument(
"--number-of-videos",
type=int,
default=1,
help="How many videos to generate.",
)
parser.add_argument(
"--poll-seconds",
type=int,
default=10,
help="Polling interval while generation is running.",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
if not os.getenv("GEMINI_API_KEY"):
print("Missing GEMINI_API_KEY environment variable.", file=sys.stderr)
return 1
client = genai.Client()
config = types.GenerateVideosConfig(
resolution=args.resolution,
duration_seconds=args.duration,
aspect_ratio=args.aspect_ratio,
number_of_videos=args.number_of_videos,
)
operation = client.models.generate_videos(
model=args.model,
prompt=args.prompt,
config=config,
)
started_at = time.time()
while not operation.done:
elapsed_seconds = int(time.time() - started_at)
print(f"Waiting for video generation... elapsed: {elapsed_seconds}s")
time.sleep(args.poll_seconds)
operation = client.operations.get(operation)
generated = operation.response.generated_videos
if not generated:
print("No videos returned by API.", file=sys.stderr)
return 2
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
base_name = args.name
saved_files = []
if len(generated) == 1:
video_obj = generated[0].video
client.files.download(file=video_obj)
out_path = out_dir / f"{base_name}.mp4"
video_obj.save(str(out_path))
strip_audio(out_path)
saved_files.append(str(out_path.resolve()))
print(f"Saved video: {out_path.resolve()}")
else:
for idx, item in enumerate(generated, start=1):
video_obj = item.video
client.files.download(file=video_obj)
each_path = out_dir / f"{base_name}_{idx}.mp4"
video_obj.save(str(each_path))
strip_audio(each_path)
saved_files.append(str(each_path.resolve()))
print(f"Saved video: {each_path.resolve()}")
metadata_path = out_dir / f"{base_name}.json"
metadata = {
"prompt": args.prompt,
"model": args.model,
"config": {
"resolution": args.resolution,
"duration_seconds": args.duration,
"aspect_ratio": args.aspect_ratio,
"number_of_videos": args.number_of_videos,
"poll_seconds": args.poll_seconds,
},
"saved_videos": saved_files,
}
metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
print(f"Saved metadata: {metadata_path.resolve()}")
return 0
if __name__ == "__main__":
raise SystemExit(main())