diff --git a/README.md b/README.md
index 79fa316..1b842b0 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,10 @@ Docker (GPU):
 ```bash
 cp .env.example .env
 docker build -t content-generation:latest .
-docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest
+docker run --rm --gpus all --env-file .env \
+  -v "$(pwd)":/app \
+  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
+  -w /app content-generation:latest
 ```
 
 First run (skip S3 upload):
@@ -36,7 +39,11 @@ python run_video_pipeline.py --skip-s3-upload
 Docker first run (skip S3 upload):
 
 ```bash
-docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest \
+docker run --rm --gpus all --env-file .env \
+  -v "$(pwd)":/app \
+  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
+  -w /app \
+  content-generation:latest \
   python run_video_pipeline.py --skip-s3-upload
 ```
 
@@ -136,6 +143,7 @@ docker build -t content-generation:latest --build-arg INSTALL_OPTIONAL_ATTENTION
 docker run --rm --gpus all \
   --env-file .env \
   -v "$(pwd)":/app \
+  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
   -w /app \
   content-generation:latest
 ```
@@ -146,6 +154,7 @@ docker run --rm --gpus all \
 docker run --rm --gpus all \
   --env-file .env \
   -v "$(pwd)":/app \
+  -v "$HOME/.cache/huggingface":/root/.cache/huggingface \
   -w /app \
   content-generation:latest \
   python run_video_pipeline.py --skip-s3-upload --log-level DEBUG
diff --git a/run_video_pipeline.py b/run_video_pipeline.py
index 6e0bc25..6155997 100644
--- a/run_video_pipeline.py
+++ b/run_video_pipeline.py
@@ -40,6 +40,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
     parser.add_argument("--seed", type=int, default=1)
     parser.add_argument("--skip-generate", action="store_true")
+    parser.add_argument("--skip-audio-generate", action="store_true")
     parser.add_argument("--skip-merge", action="store_true")
     parser.add_argument("--skip-concat", action="store_true")
     parser.add_argument("--skip-s3-upload", action="store_true")
@@ -123,6 +124,16 @@ def main() -> int:
                 LOGGER.error("Reel script was not generated at %s", args.reel_script)
                 return 1
 
+        if not args.skip_generate and not args.skip_audio_generate:
+            run_step(
+                "Generate Audios",
+                _with_log_level([
+                    sys.executable,
+                    str(SCRIPT_DIR / "generate_audios.py"),
+                ], args.log_level),
+                cwd=args.base_dir,
+            )
+
         if not args.skip_generate:
             run_step(
                 "Generate Videos",
@@ -145,18 +156,22 @@ def main() -> int:
             )
 
         if not args.skip_merge:
+            merge_cmd = [
+                sys.executable,
+                str(SCRIPT_DIR / "merge_audio_video.py"),
+                "--videos-dir",
+                str(args.videos_dir),
+                "--audios-dir",
+                str(args.audios_dir),
+                "--output-dir",
+                str(args.merged_dir),
+            ]
+            if args.skip_audio_generate:
+                merge_cmd.append("--allow-missing-audio")
+
             run_step(
                 "Merge Audio + Video",
-                _with_log_level([
-                    sys.executable,
-                    str(SCRIPT_DIR / "merge_audio_video.py"),
-                    "--videos-dir",
-                    str(args.videos_dir),
-                    "--audios-dir",
-                    str(args.audios_dir),
-                    "--output-dir",
-                    str(args.merged_dir),
-                ], args.log_level),
+                _with_log_level(merge_cmd, args.log_level),
             )
 
         if not args.skip_concat:
diff --git a/src/merge_audio_video.py b/src/merge_audio_video.py
index a8d6d2e..ffdfbf9 100644
--- a/src/merge_audio_video.py
+++ b/src/merge_audio_video.py
@@ -6,10 +6,11 @@ from __future__ import annotations
 import argparse
 import logging
 import re
+import shutil
 import subprocess
 from pathlib import Path
 
-from logging_config import configure_logging, debug_log_lifecycle
+from logging_config import configure_logging
 
 
 SCRIPT_DIR = Path(__file__).resolve().parent
@@ -32,14 +33,14 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
     parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
     parser.add_argument(
-        "--log-level",
-        default=None,
-        help="Logging level (overrides LOG_LEVEL env var)",
+        "--allow-missing-audio",
+        action="store_true",
+        help="If set, create merged output from video only when audio is missing.",
     )
+    parser.add_argument("--log-level", default="INFO")
     return parser.parse_args()
 
 
-@debug_log_lifecycle
 def main() -> int:
     args = parse_args()
     configure_logging(args.log_level)
@@ -55,14 +56,24 @@ def main() -> int:
         audio = args.audios_dir / f"output_{num}.mp3"
         output = args.output_dir / f"merged_{num}.mp4"
 
-        if not audio.exists():
-            LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio)
-            continue
-
         if output.exists():
             LOGGER.info("Already exists; skipped shot %s", num)
             continue
 
+        if not audio.exists():
+            if args.allow_missing_audio:
+                LOGGER.warning(
+                    "No audio found for shot %s (%s); using video-only output",
+                    num,
+                    audio,
+                )
+                shutil.copy2(video, output)
+                LOGGER.info("Done (video-only): %s", output)
+                continue
+
+            LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio)
+            continue
+
         LOGGER.info("Merging shot %s: %s + %s -> %s", num, video, audio, output)
         subprocess.run(
             [