#!/usr/bin/env python3 """ Generate one image per row of lyrics from a text file. Each line is used as the Chinese characters in the image generation prompt. """ import argparse import subprocess import sys from pathlib import Path def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Generate images for each line of lyrics from a text file." ) parser.add_argument( "--lyrics-file", "--lyrics_file", dest="lyrics_file", required=True, help="Path to the lyrics text file (one line per image).", ) parser.add_argument( "--input-image-path", "--input_image_path", dest="input_image_path", required=True, help="Path to the primary conditioning image.", ) parser.add_argument( "--output-dir", "--output_dir", dest="output_dir", default="output_dir", help="Directory to save outputs (default: output_dir).", ) parser.add_argument( "--model", default="gemini-3.1-flash-image-preview", help="Image generation model name.", ) parser.add_argument( "--aspect-ratio", default="16:9", help="Aspect ratio (e.g. 1:1, 16:9, 9:16).", ) parser.add_argument( "--resolution", default="2K", help="Output resolution: 512px, 1K, 2K, or 4K.", ) parser.add_argument( "--extra-image-paths", dest="extra_image_paths", nargs="*", default=[], help="Optional additional conditioning image paths.", ) parser.add_argument( "--thinking-level", default=None, choices=["minimal", "high"], help="Thinking level for Gemini 3.1 Flash Image.", ) parser.add_argument( "--row-ids", "--row_ids", dest="row_ids", type=int, nargs="*", default=None, help="Specific row IDs to generate (1-based). If not set, generate all.", ) return parser.parse_args() def build_prompt(chars: str) -> str: """Build the image generation prompt for the given Chinese characters.""" return f""" Replace the chinese characters with '{chars}'. Black text on pure white background. The thickness of the strokes should be consistent with the original image. One character. Strictly follow the font of the original image. """.strip() def main() -> int: args = parse_args() lyrics_path = Path(args.lyrics_file).expanduser().resolve() if not lyrics_path.exists(): print(f"Error: Lyrics file not found: {lyrics_path}", file=sys.stderr) return 1 lines = lyrics_path.read_text(encoding="utf-8").strip().splitlines() # row_id = 1-based line number in file (correlates to txt row, enables selective generation later) rows_to_generate = [(i, line.strip()) for i, line in enumerate(lines, start=1) if line.strip()] if args.row_ids is not None: row_ids_set = set(args.row_ids) rows_to_generate = [(row_id, chars) for row_id, chars in rows_to_generate if row_id in row_ids_set] if not rows_to_generate: print("Error: No matching rows found for the given row IDs.", file=sys.stderr) return 1 if not rows_to_generate: print("Error: No non-empty lines in lyrics file.", file=sys.stderr) return 1 output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) script_dir = Path(__file__).resolve().parent gen_script = script_dir / "gen_image_image_cond.py" for idx, (row_id, chars) in enumerate(rows_to_generate, start=1): name = f"row_{row_id}" prompt = build_prompt(chars) cmd = [ sys.executable, str(gen_script), "--prompt", prompt, "--input-image-path", args.input_image_path, "--output-dir", str(output_dir), "--name", name, "--model", args.model, "--aspect-ratio", args.aspect_ratio, "--resolution", args.resolution, "--number-of-images", "1", ] if args.extra_image_paths: cmd.extend(["--extra-image-paths"] + args.extra_image_paths) if args.thinking_level: cmd.extend(["--thinking-level", args.thinking_level]) print(f"[{idx}/{len(rows_to_generate)}] Row {row_id}: '{chars}' -> {output_dir / f'{name}.png'}") result = subprocess.run(cmd) if result.returncode != 0: print(f"Error: Failed to generate image for row {row_id} ('{chars}')", file=sys.stderr) return result.returncode print(f"\nDone. Generated {len(rows_to_generate)} images in {output_dir}") return 0 if __name__ == "__main__": raise SystemExit(main())