Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Generate one image per row of lyrics from a text file. | |
| Each line is used as the Chinese characters in the image generation prompt. | |
| """ | |
| import argparse | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| description="Generate images for each line of lyrics from a text file." | |
| ) | |
| parser.add_argument( | |
| "--lyrics-file", | |
| "--lyrics_file", | |
| dest="lyrics_file", | |
| required=True, | |
| help="Path to the lyrics text file (one line per image).", | |
| ) | |
| parser.add_argument( | |
| "--input-image-path", | |
| "--input_image_path", | |
| dest="input_image_path", | |
| required=True, | |
| help="Path to the primary conditioning image.", | |
| ) | |
| parser.add_argument( | |
| "--output-dir", | |
| "--output_dir", | |
| dest="output_dir", | |
| default="output_dir", | |
| help="Directory to save outputs (default: output_dir).", | |
| ) | |
| parser.add_argument( | |
| "--model", | |
| default="gemini-3.1-flash-image-preview", | |
| help="Image generation model name.", | |
| ) | |
| parser.add_argument( | |
| "--aspect-ratio", | |
| default="16:9", | |
| help="Aspect ratio (e.g. 1:1, 16:9, 9:16).", | |
| ) | |
| parser.add_argument( | |
| "--resolution", | |
| default="2K", | |
| help="Output resolution: 512px, 1K, 2K, or 4K.", | |
| ) | |
| parser.add_argument( | |
| "--extra-image-paths", | |
| dest="extra_image_paths", | |
| nargs="*", | |
| default=[], | |
| help="Optional additional conditioning image paths.", | |
| ) | |
| parser.add_argument( | |
| "--thinking-level", | |
| default=None, | |
| choices=["minimal", "high"], | |
| help="Thinking level for Gemini 3.1 Flash Image.", | |
| ) | |
| parser.add_argument( | |
| "--row-ids", | |
| "--row_ids", | |
| dest="row_ids", | |
| type=int, | |
| nargs="*", | |
| default=None, | |
| help="Specific row IDs to generate (1-based). If not set, generate all.", | |
| ) | |
| return parser.parse_args() | |
| def build_prompt(chars: str) -> str: | |
| """Build the image generation prompt for the given Chinese characters.""" | |
| return f""" | |
| Replace the chinese characters with '{chars}'. | |
| Black text on pure white background. The thickness of the strokes should be consistent with the original image. One character. | |
| Strictly follow the font of the original image. | |
| """.strip() | |
| def main() -> int: | |
| args = parse_args() | |
| lyrics_path = Path(args.lyrics_file).expanduser().resolve() | |
| if not lyrics_path.exists(): | |
| print(f"Error: Lyrics file not found: {lyrics_path}", file=sys.stderr) | |
| return 1 | |
| lines = lyrics_path.read_text(encoding="utf-8").strip().splitlines() | |
| # row_id = 1-based line number in file (correlates to txt row, enables selective generation later) | |
| rows_to_generate = [(i, line.strip()) for i, line in enumerate(lines, start=1) if line.strip()] | |
| if args.row_ids is not None: | |
| row_ids_set = set(args.row_ids) | |
| rows_to_generate = [(row_id, chars) for row_id, chars in rows_to_generate if row_id in row_ids_set] | |
| if not rows_to_generate: | |
| print("Error: No matching rows found for the given row IDs.", file=sys.stderr) | |
| return 1 | |
| if not rows_to_generate: | |
| print("Error: No non-empty lines in lyrics file.", file=sys.stderr) | |
| return 1 | |
| output_dir = Path(args.output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| script_dir = Path(__file__).resolve().parent | |
| gen_script = script_dir / "gen_image_image_cond.py" | |
| for idx, (row_id, chars) in enumerate(rows_to_generate, start=1): | |
| name = f"row_{row_id}" | |
| prompt = build_prompt(chars) | |
| cmd = [ | |
| sys.executable, | |
| str(gen_script), | |
| "--prompt", | |
| prompt, | |
| "--input-image-path", | |
| args.input_image_path, | |
| "--output-dir", | |
| str(output_dir), | |
| "--name", | |
| name, | |
| "--model", | |
| args.model, | |
| "--aspect-ratio", | |
| args.aspect_ratio, | |
| "--resolution", | |
| args.resolution, | |
| "--number-of-images", | |
| "1", | |
| ] | |
| if args.extra_image_paths: | |
| cmd.extend(["--extra-image-paths"] + args.extra_image_paths) | |
| if args.thinking_level: | |
| cmd.extend(["--thinking-level", args.thinking_level]) | |
| print(f"[{idx}/{len(rows_to_generate)}] Row {row_id}: '{chars}' -> {output_dir / f'{name}.png'}") | |
| result = subprocess.run(cmd) | |
| if result.returncode != 0: | |
| print(f"Error: Failed to generate image for row {row_id} ('{chars}')", file=sys.stderr) | |
| return result.returncode | |
| print(f"\nDone. Generated {len(rows_to_generate)} images in {output_dir}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |