diff --git a/README.md b/README.md index 79fa316..1b842b0 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,10 @@ Docker (GPU): ```bash cp .env.example .env docker build -t content-generation:latest . -docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest +docker run --rm --gpus all --env-file .env \ + -v "$(pwd)":/app \ + -v "$HOME/.cache/huggingface":/root/.cache/huggingface \ + -w /app content-generation:latest ``` First run (skip S3 upload): @@ -36,7 +39,11 @@ python run_video_pipeline.py --skip-s3-upload Docker first run (skip S3 upload): ```bash -docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest \ +docker run --rm --gpus all --env-file .env \ + -v "$(pwd)":/app \ + -v "$HOME/.cache/huggingface":/root/.cache/huggingface \ + -w /app \ + content-generation:latest \ python run_video_pipeline.py --skip-s3-upload ``` @@ -136,6 +143,7 @@ docker build -t content-generation:latest --build-arg INSTALL_OPTIONAL_ATTENTION docker run --rm --gpus all \ --env-file .env \ -v "$(pwd)":/app \ + -v "$HOME/.cache/huggingface":/root/.cache/huggingface \ -w /app \ content-generation:latest ``` @@ -146,6 +154,7 @@ docker run --rm --gpus all \ docker run --rm --gpus all \ --env-file .env \ -v "$(pwd)":/app \ + -v "$HOME/.cache/huggingface":/root/.cache/huggingface \ -w /app \ content-generation:latest \ python run_video_pipeline.py --skip-s3-upload --log-level DEBUG diff --git a/run_video_pipeline.py b/run_video_pipeline.py index 6e0bc25..6155997 100644 --- a/run_video_pipeline.py +++ b/run_video_pipeline.py @@ -40,6 +40,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT) parser.add_argument("--seed", type=int, default=1) parser.add_argument("--skip-generate", action="store_true") + parser.add_argument("--skip-audio-generate", action="store_true") parser.add_argument("--skip-merge", action="store_true") parser.add_argument("--skip-concat", action="store_true") parser.add_argument("--skip-s3-upload", action="store_true") @@ -123,6 +124,16 @@ def main() -> int: LOGGER.error("Reel script was not generated at %s", args.reel_script) return 1 + if not args.skip_generate and not args.skip_audio_generate: + run_step( + "Generate Audios", + _with_log_level([ + sys.executable, + str(SCRIPT_DIR / "generate_audios.py"), + ], args.log_level), + cwd=args.base_dir, + ) + if not args.skip_generate: run_step( "Generate Videos", @@ -145,18 +156,22 @@ def main() -> int: ) if not args.skip_merge: + merge_cmd = [ + sys.executable, + str(SCRIPT_DIR / "merge_audio_video.py"), + "--videos-dir", + str(args.videos_dir), + "--audios-dir", + str(args.audios_dir), + "--output-dir", + str(args.merged_dir), + ] + if args.skip_audio_generate: + merge_cmd.append("--allow-missing-audio") + run_step( "Merge Audio + Video", - _with_log_level([ - sys.executable, - str(SCRIPT_DIR / "merge_audio_video.py"), - "--videos-dir", - str(args.videos_dir), - "--audios-dir", - str(args.audios_dir), - "--output-dir", - str(args.merged_dir), - ], args.log_level), + _with_log_level(merge_cmd, args.log_level), ) if not args.skip_concat: diff --git a/src/merge_audio_video.py b/src/merge_audio_video.py index a8d6d2e..ffdfbf9 100644 --- a/src/merge_audio_video.py +++ b/src/merge_audio_video.py @@ -6,10 +6,11 @@ from __future__ import annotations import argparse import logging import re +import shutil import subprocess from pathlib import Path -from logging_config import configure_logging, debug_log_lifecycle +from logging_config import configure_logging SCRIPT_DIR = Path(__file__).resolve().parent @@ -32,14 +33,14 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR) parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR) parser.add_argument( - "--log-level", - default=None, - help="Logging level (overrides LOG_LEVEL env var)", + "--allow-missing-audio", + action="store_true", + help="If set, create merged output from video only when audio is missing.", ) + parser.add_argument("--log-level", default="INFO") return parser.parse_args() -@debug_log_lifecycle def main() -> int: args = parse_args() configure_logging(args.log_level) @@ -55,14 +56,24 @@ def main() -> int: audio = args.audios_dir / f"output_{num}.mp3" output = args.output_dir / f"merged_{num}.mp4" - if not audio.exists(): - LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio) - continue - if output.exists(): LOGGER.info("Already exists; skipped shot %s", num) continue + if not audio.exists(): + if args.allow_missing_audio: + LOGGER.warning( + "No audio found for shot %s (%s); using video-only output", + num, + audio, + ) + shutil.copy2(video, output) + LOGGER.info("Done (video-only): %s", output) + continue + + LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio) + continue + LOGGER.info("Merging shot %s: %s + %s -> %s", num, video, audio, output) subprocess.run( [