Refactor src layout and add logging lifecycle + tests

2026-04-02 12:32:02 +02:00
parent e3c2b9ddee
commit a0a66264d2
13 changed files with 172 additions and 202 deletions
--- a/src/generate_videos.py
+++ b/src/generate_videos.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""Generate shot videos with HunyuanVideo based on reel script and audio durations."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import subprocess
+from pathlib import Path
+
+from logging_config import configure_logging, debug_log_lifecycle
+
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
+DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
+DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
+DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
+DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
+DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
+
+LOGGER = logging.getLogger(__name__)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
+    parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
+    parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
+    parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
+    parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
+    parser.add_argument("--seed", type=int, default=1)
+    parser.add_argument(
+        "--log-level",
+        default=None,
+        help="Logging level (overrides LOG_LEVEL env var)",
+    )
+    return parser.parse_args()
+
+
+@debug_log_lifecycle
+def get_audio_duration(audio_path: Path) -> float:
+    result = subprocess.run(
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            str(audio_path),
+        ],
+        check=True,
+        text=True,
+        capture_output=True,
+    )
+    return float(result.stdout.strip())
+
+
+@debug_log_lifecycle
+def duration_to_video_length(duration: float) -> int:
+    frames = int(duration * 24) + 1
+    if frames % 2 == 0:
+        frames += 1
+    return max(49, min(frames, 169))
+
+
+@debug_log_lifecycle
+def main() -> int:
+    args = parse_args()
+    configure_logging(args.log_level)
+    model_path = args.hunyuan_dir / "ckpts"
+
+    args.videos_dir.mkdir(parents=True, exist_ok=True)
+
+    env = os.environ.copy()
+    env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"
+
+    data = json.loads(args.reel_script.read_text())
+    shots = data.get("shots", [])
+    LOGGER.info("Found %s shots to generate", len(shots))
+
+    for shot in shots:
+        shot_number = shot["shot_number"]
+        prompt = str(shot["image_description"]).replace("\t", " ").replace("\n", " ")
+
+        image_path = args.images_dir / f"shot_{shot_number}.png"
+        output_path = args.videos_dir / f"output_{shot_number}.mp4"
+        audio_path = args.audios_dir / f"output_{shot_number}.mp3"
+
+        if not audio_path.exists():
+            LOGGER.warning("No audio found at %s, falling back to 5s default", audio_path)
+            duration = 5.0
+        else:
+            duration = get_audio_duration(audio_path)
+            LOGGER.info("Audio duration for shot %s: %ss", shot_number, duration)
+
+        video_length = duration_to_video_length(duration)
+
+        LOGGER.info("Shot %s | %ss -> %s frames", shot_number, duration, video_length)
+        LOGGER.info("Prompt: %s", prompt)
+        LOGGER.info("Image: %s", image_path)
+        LOGGER.info("Audio: %s", audio_path)
+        LOGGER.info("Output: %s", output_path)
+
+        if output_path.exists():
+            LOGGER.info("Output path already exists, skipping")
+            continue
+
+        if not image_path.exists():
+            LOGGER.warning("Image not found at %s, skipped", image_path)
+            continue
+
+        subprocess.run(
+            [
+                "python3",
+                "-c",
+                "import torch; torch.cuda.empty_cache()",
+            ],
+            check=True,
+            env=env,
+        )
+        LOGGER.info("GPU cache cleared")
+
+        subprocess.run(
+            [
+                "torchrun",
+                "--nproc_per_node=1",
+                "generate.py",
+                "--prompt",
+                prompt,
+                "--image_path",
+                str(image_path),
+                "--resolution",
+                "480p",
+                "--aspect_ratio",
+                "16:9",
+                "--seed",
+                str(args.seed),
+                "--video_length",
+                str(video_length),
+                "--rewrite",
+                "false",
+                "--cfg_distilled",
+                "true",
+                "--enable_step_distill",
+                "true",
+                "--sparse_attn",
+                "false",
+                "--use_sageattn",
+                "true",
+                "--enable_cache",
+                "false",
+                "--overlap_group_offloading",
+                "true",
+                "--sr",
+                "false",
+                "--output_path",
+                str(output_path),
+                "--model_path",
+                str(model_path),
+            ],
+            check=True,
+            cwd=args.hunyuan_dir,
+            env=env,
+        )
+
+        LOGGER.info("Shot %s done", shot_number)
+
+    LOGGER.info("Done")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())