#!/usr/bin/env python3 import argparse import json import os import subprocess import sys import tempfile import time from pathlib import Path from google import genai from google.genai import types def strip_audio(video_path: Path) -> None: """Remove audio track from video using ffmpeg (video stream copied, no re-encode).""" with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f: temp_path = Path(f.name) try: subprocess.run( ["ffmpeg", "-y", "-i", str(video_path), "-an", "-c:v", "copy", str(temp_path)], check=True, capture_output=True, ) temp_path.replace(video_path) finally: if temp_path.exists(): temp_path.unlink() def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Generate a video from a text prompt using Gemini (Veo)." ) parser.add_argument("--prompt", required=True, help="Prompt describing the video.") parser.add_argument( "--model", default="veo-3.0-fast-generate-001", help="Video generation model name.", ) parser.add_argument("--name", default="generated_video", help="Base output filename.") parser.add_argument( "--output-dir", "--output_dir", dest="output_dir", default="output_dir", help="Directory to save outputs (default: output_dir).", ) parser.add_argument("--resolution", default="1080p", help="e.g. 720p, 1080p, 4k") parser.add_argument("--duration", type=int, default=8, help="Video length in seconds.") parser.add_argument( "--aspect-ratio", default="16:9", help="Aspect ratio (e.g. 16:9, 9:16, 1:1).", ) parser.add_argument( "--number-of-videos", type=int, default=1, help="How many videos to generate.", ) parser.add_argument( "--poll-seconds", type=int, default=10, help="Polling interval while generation is running.", ) return parser.parse_args() def main() -> int: args = parse_args() if not os.getenv("GEMINI_API_KEY"): print("Missing GEMINI_API_KEY environment variable.", file=sys.stderr) return 1 client = genai.Client() config = types.GenerateVideosConfig( resolution=args.resolution, duration_seconds=args.duration, aspect_ratio=args.aspect_ratio, number_of_videos=args.number_of_videos, ) operation = client.models.generate_videos( model=args.model, prompt=args.prompt, config=config, ) started_at = time.time() while not operation.done: elapsed_seconds = int(time.time() - started_at) print(f"Waiting for video generation... elapsed: {elapsed_seconds}s") time.sleep(args.poll_seconds) operation = client.operations.get(operation) generated = operation.response.generated_videos if not generated: print("No videos returned by API.", file=sys.stderr) return 2 out_dir = Path(args.output_dir) out_dir.mkdir(parents=True, exist_ok=True) base_name = args.name saved_files = [] if len(generated) == 1: video_obj = generated[0].video client.files.download(file=video_obj) out_path = out_dir / f"{base_name}.mp4" video_obj.save(str(out_path)) strip_audio(out_path) saved_files.append(str(out_path.resolve())) print(f"Saved video: {out_path.resolve()}") else: for idx, item in enumerate(generated, start=1): video_obj = item.video client.files.download(file=video_obj) each_path = out_dir / f"{base_name}_{idx}.mp4" video_obj.save(str(each_path)) strip_audio(each_path) saved_files.append(str(each_path.resolve())) print(f"Saved video: {each_path.resolve()}") metadata_path = out_dir / f"{base_name}.json" metadata = { "prompt": args.prompt, "model": args.model, "config": { "resolution": args.resolution, "duration_seconds": args.duration, "aspect_ratio": args.aspect_ratio, "number_of_videos": args.number_of_videos, "poll_seconds": args.poll_seconds, }, "saved_videos": saved_files, } metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8") print(f"Saved metadata: {metadata_path.resolve()}") return 0 if __name__ == "__main__": raise SystemExit(main())