| |
| """ |
| Generate one image per row of lyrics from a text file. |
| Each line is used as the Chinese characters in the image generation prompt. |
| """ |
| import argparse |
| import subprocess |
| import sys |
| from pathlib import Path |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser( |
| description="Generate images for each line of lyrics from a text file." |
| ) |
| parser.add_argument( |
| "--lyrics-file", |
| "--lyrics_file", |
| dest="lyrics_file", |
| required=True, |
| help="Path to the lyrics text file (one line per image).", |
| ) |
| parser.add_argument( |
| "--input-image-path", |
| "--input_image_path", |
| dest="input_image_path", |
| required=True, |
| help="Path to the primary conditioning image.", |
| ) |
| parser.add_argument( |
| "--output-dir", |
| "--output_dir", |
| dest="output_dir", |
| default="output_dir", |
| help="Directory to save outputs (default: output_dir).", |
| ) |
| parser.add_argument( |
| "--model", |
| default="gemini-3.1-flash-image-preview", |
| help="Image generation model name.", |
| ) |
| parser.add_argument( |
| "--aspect-ratio", |
| default="16:9", |
| help="Aspect ratio (e.g. 1:1, 16:9, 9:16).", |
| ) |
| parser.add_argument( |
| "--resolution", |
| default="2K", |
| help="Output resolution: 512px, 1K, 2K, or 4K.", |
| ) |
| parser.add_argument( |
| "--extra-image-paths", |
| dest="extra_image_paths", |
| nargs="*", |
| default=[], |
| help="Optional additional conditioning image paths.", |
| ) |
| parser.add_argument( |
| "--thinking-level", |
| default=None, |
| choices=["minimal", "high"], |
| help="Thinking level for Gemini 3.1 Flash Image.", |
| ) |
| parser.add_argument( |
| "--row-ids", |
| "--row_ids", |
| dest="row_ids", |
| type=int, |
| nargs="*", |
| default=None, |
| help="Specific row IDs to generate (1-based). If not set, generate all.", |
| ) |
| return parser.parse_args() |
|
|
|
|
| def build_prompt(chars: str) -> str: |
| """Build the image generation prompt for the given Chinese characters.""" |
| return f""" |
| Replace the chinese characters with '{chars}'. |
| Black text on pure white background. The thickness of the strokes should be consistent with the original image. One character. |
| Strictly follow the font of the original image. |
| """.strip() |
|
|
|
|
| def main() -> int: |
| args = parse_args() |
|
|
| lyrics_path = Path(args.lyrics_file).expanduser().resolve() |
| if not lyrics_path.exists(): |
| print(f"Error: Lyrics file not found: {lyrics_path}", file=sys.stderr) |
| return 1 |
|
|
| lines = lyrics_path.read_text(encoding="utf-8").strip().splitlines() |
| |
| rows_to_generate = [(i, line.strip()) for i, line in enumerate(lines, start=1) if line.strip()] |
|
|
| if args.row_ids is not None: |
| row_ids_set = set(args.row_ids) |
| rows_to_generate = [(row_id, chars) for row_id, chars in rows_to_generate if row_id in row_ids_set] |
| if not rows_to_generate: |
| print("Error: No matching rows found for the given row IDs.", file=sys.stderr) |
| return 1 |
|
|
| if not rows_to_generate: |
| print("Error: No non-empty lines in lyrics file.", file=sys.stderr) |
| return 1 |
|
|
| output_dir = Path(args.output_dir) |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| script_dir = Path(__file__).resolve().parent |
| gen_script = script_dir / "gen_image_image_cond.py" |
|
|
| for idx, (row_id, chars) in enumerate(rows_to_generate, start=1): |
| name = f"row_{row_id}" |
| prompt = build_prompt(chars) |
|
|
| cmd = [ |
| sys.executable, |
| str(gen_script), |
| "--prompt", |
| prompt, |
| "--input-image-path", |
| args.input_image_path, |
| "--output-dir", |
| str(output_dir), |
| "--name", |
| name, |
| "--model", |
| args.model, |
| "--aspect-ratio", |
| args.aspect_ratio, |
| "--resolution", |
| args.resolution, |
| "--number-of-images", |
| "1", |
| ] |
|
|
| if args.extra_image_paths: |
| cmd.extend(["--extra-image-paths"] + args.extra_image_paths) |
| if args.thinking_level: |
| cmd.extend(["--thinking-level", args.thinking_level]) |
|
|
| print(f"[{idx}/{len(rows_to_generate)}] Row {row_id}: '{chars}' -> {output_dir / f'{name}.png'}") |
| result = subprocess.run(cmd) |
| if result.returncode != 0: |
| print(f"Error: Failed to generate image for row {row_id} ('{chars}')", file=sys.stderr) |
| return result.returncode |
|
|
| print(f"\nDone. Generated {len(rows_to_generate)} images in {output_dir}") |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|