Gemini-VideoGeneration / gen_lyrics_batch.py
LehongWu's picture
Upload folder using huggingface_hub
6cc3d86 verified
#!/usr/bin/env python3
"""
Generate one image per row of lyrics from a text file.
Each line is used as the Chinese characters in the image generation prompt.
"""
import argparse
import subprocess
import sys
from pathlib import Path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Generate images for each line of lyrics from a text file."
)
parser.add_argument(
"--lyrics-file",
"--lyrics_file",
dest="lyrics_file",
required=True,
help="Path to the lyrics text file (one line per image).",
)
parser.add_argument(
"--input-image-path",
"--input_image_path",
dest="input_image_path",
required=True,
help="Path to the primary conditioning image.",
)
parser.add_argument(
"--output-dir",
"--output_dir",
dest="output_dir",
default="output_dir",
help="Directory to save outputs (default: output_dir).",
)
parser.add_argument(
"--model",
default="gemini-3.1-flash-image-preview",
help="Image generation model name.",
)
parser.add_argument(
"--aspect-ratio",
default="16:9",
help="Aspect ratio (e.g. 1:1, 16:9, 9:16).",
)
parser.add_argument(
"--resolution",
default="2K",
help="Output resolution: 512px, 1K, 2K, or 4K.",
)
parser.add_argument(
"--extra-image-paths",
dest="extra_image_paths",
nargs="*",
default=[],
help="Optional additional conditioning image paths.",
)
parser.add_argument(
"--thinking-level",
default=None,
choices=["minimal", "high"],
help="Thinking level for Gemini 3.1 Flash Image.",
)
parser.add_argument(
"--row-ids",
"--row_ids",
dest="row_ids",
type=int,
nargs="*",
default=None,
help="Specific row IDs to generate (1-based). If not set, generate all.",
)
return parser.parse_args()
def build_prompt(chars: str) -> str:
"""Build the image generation prompt for the given Chinese characters."""
return f"""
Replace the chinese characters with '{chars}'.
Black text on pure white background. The thickness of the strokes should be consistent with the original image. One character.
Strictly follow the font of the original image.
""".strip()
def main() -> int:
args = parse_args()
lyrics_path = Path(args.lyrics_file).expanduser().resolve()
if not lyrics_path.exists():
print(f"Error: Lyrics file not found: {lyrics_path}", file=sys.stderr)
return 1
lines = lyrics_path.read_text(encoding="utf-8").strip().splitlines()
# row_id = 1-based line number in file (correlates to txt row, enables selective generation later)
rows_to_generate = [(i, line.strip()) for i, line in enumerate(lines, start=1) if line.strip()]
if args.row_ids is not None:
row_ids_set = set(args.row_ids)
rows_to_generate = [(row_id, chars) for row_id, chars in rows_to_generate if row_id in row_ids_set]
if not rows_to_generate:
print("Error: No matching rows found for the given row IDs.", file=sys.stderr)
return 1
if not rows_to_generate:
print("Error: No non-empty lines in lyrics file.", file=sys.stderr)
return 1
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
script_dir = Path(__file__).resolve().parent
gen_script = script_dir / "gen_image_image_cond.py"
for idx, (row_id, chars) in enumerate(rows_to_generate, start=1):
name = f"row_{row_id}"
prompt = build_prompt(chars)
cmd = [
sys.executable,
str(gen_script),
"--prompt",
prompt,
"--input-image-path",
args.input_image_path,
"--output-dir",
str(output_dir),
"--name",
name,
"--model",
args.model,
"--aspect-ratio",
args.aspect_ratio,
"--resolution",
args.resolution,
"--number-of-images",
"1",
]
if args.extra_image_paths:
cmd.extend(["--extra-image-paths"] + args.extra_image_paths)
if args.thinking_level:
cmd.extend(["--thinking-level", args.thinking_level])
print(f"[{idx}/{len(rows_to_generate)}] Row {row_id}: '{chars}' -> {output_dir / f'{name}.png'}")
result = subprocess.run(cmd)
if result.returncode != 0:
print(f"Error: Failed to generate image for row {row_id} ('{chars}')", file=sys.stderr)
return result.returncode
print(f"\nDone. Generated {len(rows_to_generate)} images in {output_dir}")
return 0
if __name__ == "__main__":
raise SystemExit(main())