Add skip-audio mode and resilient merge handling

2026-04-02 13:25:54 +02:00
parent a0a66264d2
commit 08ebab6348
3 changed files with 56 additions and 21 deletions
--- a/README.md
+++ b/README.md
@@ -24,7 +24,10 @@ Docker (GPU):
 ```bash
 cp .env.example .env
 docker build -t content-generation:latest .
-docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest
+docker run --rm --gpus all --env-file .env \
  -v "$(pwd)":/app \
  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
  -w /app content-generation:latest
 ```
 First run (skip S3 upload):
@@ -36,7 +39,11 @@ python run_video_pipeline.py --skip-s3-upload
 Docker first run (skip S3 upload):
 ```bash
-docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest \
+docker run --rm --gpus all --env-file .env \
  -v "$(pwd)":/app \
  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
  -w /app \
  content-generation:latest \
  python run_video_pipeline.py --skip-s3-upload
 ```
@@ -136,6 +143,7 @@ docker build -t content-generation:latest --build-arg INSTALL_OPTIONAL_ATTENTION
 docker run --rm --gpus all \
  --env-file .env \
  -v "$(pwd)":/app \
  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
  -w /app \
  content-generation:latest
 ```
@@ -146,6 +154,7 @@ docker run --rm --gpus all \
 docker run --rm --gpus all \
  --env-file .env \
  -v "$(pwd)":/app \
  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
  -w /app \
  content-generation:latest \
  python run_video_pipeline.py --skip-s3-upload --log-level DEBUG
--- a/run_video_pipeline.py
+++ b/run_video_pipeline.py
@@ -40,6 +40,7 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--skip-generate", action="store_true")
    parser.add_argument("--skip-audio-generate", action="store_true")
    parser.add_argument("--skip-merge", action="store_true")
    parser.add_argument("--skip-concat", action="store_true")
    parser.add_argument("--skip-s3-upload", action="store_true")
@@ -123,6 +124,16 @@ def main() -> int:
                LOGGER.error("Reel script was not generated at %s", args.reel_script)
                return 1
        if not args.skip_generate and not args.skip_audio_generate:
            run_step(
                "Generate Audios",
                _with_log_level([
                    sys.executable,
                    str(SCRIPT_DIR / "generate_audios.py"),
                ], args.log_level),
                cwd=args.base_dir,
            )
        if not args.skip_generate:
            run_step(
                "Generate Videos",
@@ -145,18 +156,22 @@ def main() -> int:
            )
        if not args.skip_merge:
            merge_cmd = [
                sys.executable,
                str(SCRIPT_DIR / "merge_audio_video.py"),
                "--videos-dir",
                str(args.videos_dir),
                "--audios-dir",
                str(args.audios_dir),
                "--output-dir",
                str(args.merged_dir),
            ]
            if args.skip_audio_generate:
                merge_cmd.append("--allow-missing-audio")
            run_step(
                "Merge Audio + Video",
-                _with_log_level([
+                _with_log_level(merge_cmd, args.log_level),
                    sys.executable,
                    str(SCRIPT_DIR / "merge_audio_video.py"),
                    "--videos-dir",
                    str(args.videos_dir),
                    "--audios-dir",
                    str(args.audios_dir),
                    "--output-dir",
                    str(args.merged_dir),
                ], args.log_level),
            )
        if not args.skip_concat:
--- a/src/merge_audio_video.py
+++ b/src/merge_audio_video.py
@@ -6,10 +6,11 @@ from __future__ import annotations
 import argparse
 import logging
 import re
 import shutil
 import subprocess
 from pathlib import Path
-from logging_config import configure_logging, debug_log_lifecycle
+from logging_config import configure_logging
 SCRIPT_DIR = Path(__file__).resolve().parent
@@ -32,14 +33,14 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
    parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
    parser.add_argument(
-        "--log-level",
+        "--allow-missing-audio",
-        default=None,
+        action="store_true",
-        help="Logging level (overrides LOG_LEVEL env var)",
+        help="If set, create merged output from video only when audio is missing.",
    )
    parser.add_argument("--log-level", default="INFO")
    return parser.parse_args()
@debug_log_lifecycle
 def main() -> int:
    args = parse_args()
    configure_logging(args.log_level)
@@ -55,14 +56,24 @@ def main() -> int:
        audio = args.audios_dir / f"output_{num}.mp3"
        output = args.output_dir / f"merged_{num}.mp4"
        if not audio.exists():
            LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio)
            continue
        if output.exists():
            LOGGER.info("Already exists; skipped shot %s", num)
            continue
        if not audio.exists():
            if args.allow_missing_audio:
                LOGGER.warning(
                    "No audio found for shot %s (%s); using video-only output",
                    num,
                    audio,
                )
                shutil.copy2(video, output)
                LOGGER.info("Done (video-only): %s", output)
                continue
            LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio)
            continue
        LOGGER.info("Merging shot %s: %s + %s -> %s", num, video, audio, output)
        subprocess.run(
            [