Refactored code, added Dockerfile, replaced bash scripts with python alternatives, added README with instructions on running a pipeline

2026-04-01 16:56:06 +02:00
parent ca116562fe
commit 686a458905
19 changed files with 1103 additions and 65 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -42,6 +42,7 @@ Thumbs.db
 # Local env and logs
 .env
 .env.*
 !.env.example
 *.log
 *.pid
@@ -50,3 +51,13 @@ Thumbs.db
 *.mov
 *.avi
 *.mkv
 # Project generated data and checkpoints
 images/
 audios/
 videos/
 merged/
 results/
 outputs/
 ckpts/
 HunyuanVideo-1.5/ckpts/
--- a/.env
+++ b/.env
@@ -1 +0,0 @@
 ELEVENLABS_API_KEY=sk_e343522cb3fd4da2d46844e81e1152e3de2a72cd1430a383
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,19 @@
 # ElevenLabs
 ELEVENLABS_API_KEY=
 # Hugging Face (required for gated model downloads, e.g. FLUX.1-schnell)
 HUGGINGFACE_HUB_TOKEN=
 # Hunyuan prompt rewrite endpoints (optional; rewrite is disabled in current generate_videos.py)
 T2V_REWRITE_BASE_URL=
 T2V_REWRITE_MODEL_NAME=
 I2V_REWRITE_BASE_URL=
 I2V_REWRITE_MODEL_NAME=
 # AWS / S3 (used when initializing S3VideoStorage)
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
 AWS_SESSION_TOKEN=
 AWS_REGION=
 AWS_S3_BUCKET=
 AWS_S3_ENDPOINT_URL=
--- a/.gitignore
+++ b/.gitignore
@@ -58,6 +58,7 @@ Thumbs.db
 # Local environment variables
 .env
 .env.*
 !.env.example
 # Project-specific artifacts
 *.mp4
--- a/66
+++ b/66
@@ -0,0 +1,66 @@
 FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128
 # Base OS tools + media stack + Python toolchain.
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3-pip \
    python3.10-dev \
    python3.10-venv \
    ffmpeg \
    git \
    git-lfs \
    ca-certificates \
    curl \
    build-essential \
    pkg-config \
    ninja-build \
    libglib2.0-0 \
    libgl1 \
    && rm -rf /var/lib/apt/lists/* \
    && ln -sf /usr/bin/python3.10 /usr/bin/python \
    && ln -sf /usr/bin/pip3 /usr/bin/pip \
    && git lfs install
 WORKDIR /app
 # Install project Python dependencies first for better layer caching.
 COPY requirements.txt /app/requirements.txt
 RUN python -m pip install --upgrade pip setuptools wheel \
    && pip install --index-url https://download.pytorch.org/whl/cu121 torch torchvision torchaudio \
    && pip install -r /app/requirements.txt \
    && pip install -U accelerate safetensors
 # Copy project code.
 COPY . /app
 # Ensure HunyuanVideo source exists in the image.
 ARG HUNYUAN_REPO=https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5.git
 RUN if [ ! -f /app/HunyuanVideo-1.5/requirements.txt ]; then \
      rm -rf /app/HunyuanVideo-1.5 && \
      git clone --depth 1 "$HUNYUAN_REPO" /app/HunyuanVideo-1.5; \
    fi
 # Install HunyuanVideo dependencies from upstream README guidance.
 RUN pip install -r /app/HunyuanVideo-1.5/requirements.txt \
    && pip install --upgrade tencentcloud-sdk-python \
    && pip install sgl-kernel==0.3.18
 # Optional attention backends from Hunyuan docs.
 # Build with: --build-arg INSTALL_OPTIONAL_ATTENTION=1
 ARG INSTALL_OPTIONAL_ATTENTION=0
 RUN if [ "$INSTALL_OPTIONAL_ATTENTION" = "1" ]; then \
      pip install flash-attn --no-build-isolation && \
      git clone --depth 1 https://github.com/Tencent-Hunyuan/flex-block-attn.git /tmp/flex-block-attn && \
      cd /tmp/flex-block-attn && git submodule update --init --recursive && python setup.py install && \
      git clone --depth 1 https://github.com/cooper1637/SageAttention.git /tmp/SageAttention && \
      cd /tmp/SageAttention && python setup.py install; \
    fi
 # Default pipeline entrypoint.
 CMD ["python", "run_video_pipeline.py"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,202 @@
 # ContentGeneration Pipeline
 This project runs a 3-step video pipeline:
 1. Generate shot videos from images + prompts.
 2. Merge each generated video with its audio.
 3. Concatenate merged clips into one final output.
 The pipeline entrypoint is `run_video_pipeline.py`.
 ## Quick Start
 Local Python:
 ```bash
 cp .env.example .env
 python3 -m venv .venv && source .venv/bin/activate
 pip install -r requirements.txt
 python run_video_pipeline.py
 ```
 Docker (GPU):
 ```bash
 cp .env.example .env
 docker build -t content-generation:latest .
 docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest
 ```
 First run (skip S3 upload):
 ```bash
 python run_video_pipeline.py --skip-s3-upload
 ```
 Docker first run (skip S3 upload):
 ```bash
 docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest \
  python run_video_pipeline.py --skip-s3-upload
 ```
 ## Project Layout
 - `run_video_pipeline.py`: main entrypoint.
 - `src/scripts/`: helper scripts used by the pipeline.
 - `HunyuanVideo-1.5/`: Hunyuan inference code and model dependencies.
 - `reel_script.json`: required script input with `shots`.
 - `images/`, `audios/`, `videos/`, `merged/`, `results/`: working/output folders.
 - `.env.example`: environment variable template.
 ## Prerequisites
 1. Linux with NVIDIA GPU and CUDA runtime.
 2. `ffmpeg` and `ffprobe` available on PATH.
 3. Python 3.10+.
 4. Hunyuan model checkpoints under `HunyuanVideo-1.5/ckpts`.
 5. If using FLUX local download, access approved for `black-forest-labs/FLUX.1-schnell`.
 ## Environment Variables
 1. Create local env file:
 ```bash
 cp .env.example .env
 ```
 2. Fill required variables in `.env`:
 - `ELEVENLABS_API_KEY` for audio generation.
 - `HUGGINGFACE_HUB_TOKEN` if gated Hugging Face model access is needed.
 - `AWS_S3_BUCKET` (+ optional AWS vars) if you want final output uploaded to S3.
 ## Run Locally (Python)
 1. Create and activate a virtual environment:
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
 ```
 2. Install Python dependencies:
 ```bash
 python -m pip install --upgrade pip
 pip install -r requirements.txt
 ```
 3. Install Hunyuan dependencies:
 ```bash
 pip install -r HunyuanVideo-1.5/requirements.txt
 pip install --upgrade tencentcloud-sdk-python
 pip install sgl-kernel==0.3.18
 ```
 4. Run full pipeline:
 ```bash
 python run_video_pipeline.py
 ```
 5. Common options:
 ```bash
 # Skip generation and only merge + concat
 python run_video_pipeline.py --skip-generate
 # Skip S3 upload
 python run_video_pipeline.py --skip-s3-upload
 # Override base directory
 python run_video_pipeline.py --base-dir /absolute/path/to/workdir
 # Change logging verbosity
 python run_video_pipeline.py --log-level DEBUG
 ```
 ## Run with Docker
 1. Build image:
 ```bash
 docker build -t content-generation:latest .
 ```
 2. Optional build with extra attention backends:
 ```bash
 docker build -t content-generation:latest --build-arg INSTALL_OPTIONAL_ATTENTION=1 .
 ```
 3. Run pipeline in container (GPU required):
 ```bash
 docker run --rm --gpus all \
  --env-file .env \
  -v "$(pwd)":/app \
  -w /app \
  content-generation:latest
 ```
 4. Pass extra pipeline args:
 ```bash
 docker run --rm --gpus all \
  --env-file .env \
  -v "$(pwd)":/app \
  -w /app \
  content-generation:latest \
  python run_video_pipeline.py --skip-s3-upload --log-level DEBUG
 ```
 ## Input Expectations
 1. `reel_script.json` must exist and contain a `shots` array.
 2. `images/shot_<n>.png` and `audios/output_<n>.mp3` should align by shot number.
 3. Final output is written by default to `results/final_output.mp4`.
 ## S3 Upload Behavior
 1. If `AWS_S3_BUCKET` is set, the pipeline uploads final output to S3 using `S3VideoStorage`.
 2. If `AWS_S3_BUCKET` is missing, upload is skipped with a warning.
 3. Disable upload explicitly with `--skip-s3-upload`.
 ## Troubleshooting
 1. `torch.cuda.is_available()` is false in Docker.
 - Run with GPU flags: `docker run --gpus all ...`
 - Verify NVIDIA Container Toolkit is installed on host.
 - Check host GPU visibility: `nvidia-smi`.
 2. `ffmpeg` or `ffprobe` not found.
 - Local: install ffmpeg with your package manager.
 - Docker: ffmpeg is installed in the provided Dockerfile.
 3. Hunyuan generate step fails due to missing checkpoints.
 - Ensure checkpoints are available under `HunyuanVideo-1.5/ckpts`.
 - Confirm mounted project path in Docker includes checkpoints.
 4. Hugging Face model download fails (401/403).
 - Accept model access terms for gated models (for example FLUX.1-schnell).
 - Set `HUGGINGFACE_HUB_TOKEN` in `.env`.
 5. S3 upload fails.
 - Confirm `AWS_S3_BUCKET` is set.
 - If needed, set `AWS_REGION` and credentials (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional `AWS_SESSION_TOKEN`).
 - For S3-compatible providers, set `AWS_S3_ENDPOINT_URL`.
 6. Permission issues when running Docker with mounted volumes.
 - Use your host user mapping if needed:
  `docker run --rm --gpus all -u "$(id -u):$(id -g)" ...`
 7. Out-of-memory during video generation.
 - Keep `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128`.
 - Reduce workload by skipping optional enhancements or lowering resolution/steps in generation scripts.
 8. Verify syntax quickly before running.
 ```bash
 python3 -m py_compile run_video_pipeline.py src/scripts/*.py
 ```
--- a/generate_audios.py
+++ b/generate_audios.py
@@ -1,35 +0,0 @@
 from elevenlabs.client import ElevenLabs
 from elevenlabs.play import play
 import os
 import json
 from dotenv import load_dotenv
 load_dotenv()
 ELEVENLABS_API_KEY = os.getenv('ELEVENLABS_API_KEY')
 if __name__ == '__main__':
    script_path = "reel_script.json"
    with open(script_path, "r") as f:
        reel_data = json.load(f)
    client = ElevenLabs(
        api_key=ELEVENLABS_API_KEY
    )
    for shot in reel_data["shots"]:
        print(shot["shot_number"], shot["voiceover"])
        prompt = shot["voiceover"]
        audio = client.text_to_speech.convert(
            text=prompt,
            voice_id="JBFqnCBsd6RMkjVDRZzb",
            model_id="eleven_multilingual_v2",
            output_format="mp3_44100_128",
        )
        audio_bytes = b"".join(audio)
        if not os.path.exists("audios"):
            os.makedirs("audios")
        with open(f"audios/output_{shot["shot_number"]}.mp3", "wb") as f:
            f.write(audio_bytes)
--- a/generate_images.py
+++ b/generate_images.py
@@ -1,28 +0,0 @@
 import torch
 from diffusers import FluxPipeline
 import json
 import os
 if __name__ == '__main__':
    script_path = "reel_script.json"
    with open(script_path, "r") as f:
        reel_data = json.load(f)
    pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
    pipe.enable_model_cpu_offload() 
    for shot in reel_data["shots"]:
        print(shot["shot_number"], shot["image_description"])
        prompt = shot["image_description"]
        image = pipe(
            prompt,
            guidance_scale=0.0,
            num_inference_steps=4,
            max_sequence_length=256,
            generator=torch.Generator("cpu").manual_seed(0)
        ).images[0]
        if not os.path.exists("images"):
            os.makedirs("images")
        image.save(f"images/shot_{shot["shot_number"]}.png")
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,17 @@
 # Core project dependencies inferred from imports in this workspace
 boto3
 python-dotenv
 elevenlabs
 torch
 transformers
 diffusers
 accelerate
 safetensors
 huggingface-hub
 # Optional but commonly required for 4-bit quantization with BitsAndBytesConfig
 bitsandbytes
 # Notes:
 # - ffmpeg/ffprobe are required by video scripts but installed at OS level, not via pip.
 # - torchrun is provided by the torch package.
--- a/run_video_pipeline.py
+++ b/run_video_pipeline.py
@@ -0,0 +1,163 @@
 #!/usr/bin/env python3
 """Run the full video pipeline: generate, merge, and concatenate."""
 from __future__ import annotations
 import argparse
 import logging
 import os
 import subprocess
 import sys
 from pathlib import Path
 from src.scripts.logging_config import configure_logging
 from src.scripts.s3_video_storage import S3VideoStorage
 PROJECT_ROOT = Path(__file__).resolve().parent
 SCRIPT_DIR = PROJECT_ROOT / "src" / "scripts"
 DEFAULT_BASE_DIR = PROJECT_ROOT
 DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
 DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
 DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
 DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
 DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
 DEFAULT_MERGED_DIR = DEFAULT_BASE_DIR / "merged"
 DEFAULT_OUTPUT = DEFAULT_BASE_DIR / "results" / "final_output.mp4"
 LOGGER = logging.getLogger(__name__)
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--base-dir", type=Path, default=DEFAULT_BASE_DIR)
    parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
    parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
    parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
    parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
    parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
    parser.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--skip-generate", action="store_true")
    parser.add_argument("--skip-merge", action="store_true")
    parser.add_argument("--skip-concat", action="store_true")
    parser.add_argument("--skip-s3-upload", action="store_true")
    parser.add_argument("--log-level", default="INFO")
    return parser.parse_args()
 def run_step(name: str, cmd: list[str]) -> None:
    LOGGER.info("=== %s ===", name)
    LOGGER.info("$ %s", " ".join(str(part) for part in cmd))
    subprocess.run(cmd, check=True)
 def maybe_upload_to_s3(output_path: Path) -> None:
    bucket = os.getenv("AWS_S3_BUCKET")
    if not bucket:
        LOGGER.warning("Skipping S3 upload: AWS_S3_BUCKET is not set")
        return
    storage = S3VideoStorage(
        {
            "bucket_name": bucket,
            "region_name": os.getenv("AWS_REGION"),
            "endpoint_url": os.getenv("AWS_S3_ENDPOINT_URL"),
            "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
            "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
            "aws_session_token": os.getenv("AWS_SESSION_TOKEN"),
        }
    )
    s3_uri = storage.store_file(output_path)
    LOGGER.info("Uploaded output to %s", s3_uri)
 def main() -> int:
    args = parse_args()
    configure_logging(args.log_level)
    # If only base-dir is overridden, derive the common subpaths from it.
    if args.base_dir != DEFAULT_BASE_DIR:
        if args.hunyuan_dir == DEFAULT_HUNYUAN_DIR:
            args.hunyuan_dir = args.base_dir / "HunyuanVideo-1.5"
        if args.reel_script == DEFAULT_REEL_SCRIPT:
            args.reel_script = args.base_dir / "reel_script.json"
        if args.images_dir == DEFAULT_IMAGES_DIR:
            args.images_dir = args.base_dir / "images"
        if args.videos_dir == DEFAULT_VIDEOS_DIR:
            args.videos_dir = args.base_dir / "videos"
        if args.audios_dir == DEFAULT_AUDIOS_DIR:
            args.audios_dir = args.base_dir / "audios"
        if args.merged_dir == DEFAULT_MERGED_DIR:
            args.merged_dir = args.base_dir / "merged"
        if args.output == DEFAULT_OUTPUT:
            args.output = args.base_dir / "results" / "final_output.mp4"
    try:
        if not args.skip_generate:
            run_step(
                "Generate Videos",
                [
                    sys.executable,
                    str(SCRIPT_DIR / "generate_videos.py"),
                    "--hunyuan-dir",
                    str(args.hunyuan_dir),
                    "--reel-script",
                    str(args.reel_script),
                    "--images-dir",
                    str(args.images_dir),
                    "--videos-dir",
                    str(args.videos_dir),
                    "--audios-dir",
                    str(args.audios_dir),
                    "--seed",
                    str(args.seed),
                ],
            )
        if not args.skip_merge:
            run_step(
                "Merge Audio + Video",
                [
                    sys.executable,
                    str(SCRIPT_DIR / "merge_audio_video.py"),
                    "--videos-dir",
                    str(args.videos_dir),
                    "--audios-dir",
                    str(args.audios_dir),
                    "--output-dir",
                    str(args.merged_dir),
                ],
            )
        if not args.skip_concat:
            run_step(
                "Concatenate Merged Videos",
                [
                    sys.executable,
                    str(SCRIPT_DIR / "concat_merged.py"),
                    "--merged-dir",
                    str(args.merged_dir),
                    "--output",
                    str(args.output),
                ],
            )
    except subprocess.CalledProcessError as exc:
        LOGGER.exception("Pipeline failed at command: %s", exc.cmd)
        return exc.returncode
    if not args.skip_s3_upload:
        try:
            maybe_upload_to_s3(args.output)
        except Exception:
            LOGGER.exception("Failed uploading output to S3")
            return 1
    LOGGER.info("Pipeline complete")
    LOGGER.info("Final output: %s", args.output)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/concat_merged.py
+++ b/src/scripts/concat_merged.py
@@ -0,0 +1,80 @@
 #!/usr/bin/env python3
 """Concatenate merged_*.mp4 files into a single output using ffmpeg concat demuxer."""
 from __future__ import annotations
 import argparse
 import logging
 import re
 import subprocess
 import tempfile
 from pathlib import Path
 from logging_config import configure_logging
 SCRIPT_DIR = Path(__file__).resolve().parent
 DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
 DEFAULT_MERGED_DIR = DEFAULT_BASE_DIR / "merged"
 DEFAULT_OUTPUT = DEFAULT_BASE_DIR / "results" / "run_3" / "final_output.mp4"
 LOGGER = logging.getLogger(__name__)
 def shot_number(path: Path) -> int:
    match = re.search(r"merged_(\d+)\.mp4$", path.name)
    return int(match.group(1)) if match else -1
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
    parser.add_argument("--log-level", default="INFO")
    return parser.parse_args()
 def main() -> int:
    args = parse_args()
    configure_logging(args.log_level)
    videos = sorted(args.merged_dir.glob("merged_*.mp4"), key=shot_number)
    if not videos:
        LOGGER.warning("No merged videos found in %s", args.merged_dir)
        return 1
    args.output.parent.mkdir(parents=True, exist_ok=True)
    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as tmp:
        filelist = Path(tmp.name)
        for video in videos:
            tmp.write(f"file '{video}'\\n")
    try:
        LOGGER.info("Concatenating the following files:\n%s", filelist.read_text().rstrip())
        subprocess.run(
            [
                "ffmpeg",
                "-f",
                "concat",
                "-safe",
                "0",
                "-i",
                str(filelist),
                "-c",
                "copy",
                "-y",
                str(args.output),
            ],
            check=True,
        )
    finally:
        filelist.unlink(missing_ok=True)
    LOGGER.info("Done")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/generate_audios.py
+++ b/src/scripts/generate_audios.py
@@ -0,0 +1,54 @@
 from __future__ import annotations
 import json
 import logging
 import os
 from pathlib import Path
 from dotenv import load_dotenv
 from elevenlabs.client import ElevenLabs
 from logging_config import configure_logging
 SCRIPT_DIR = Path(__file__).resolve().parent
 PROJECT_ROOT = SCRIPT_DIR.parents[1]
 load_dotenv(PROJECT_ROOT / ".env")
 LOGGER = logging.getLogger(__name__)
 def main() -> int:
    configure_logging("INFO")
    api_key = os.getenv("ELEVENLABS_API_KEY")
    if not api_key:
        raise RuntimeError("ELEVENLABS_API_KEY is not set")
    reel_script = PROJECT_ROOT / "reel_script.json"
    audios_dir = PROJECT_ROOT / "audios"
    audios_dir.mkdir(parents=True, exist_ok=True)
    reel_data = json.loads(reel_script.read_text())
    client = ElevenLabs(api_key=api_key)
    for shot in reel_data["shots"]:
        shot_num = shot["shot_number"]
        prompt = shot["voiceover"]
        LOGGER.info("Generating audio for shot %s: %s", shot_num, prompt)
        audio = client.text_to_speech.convert(
            text=prompt,
            voice_id="JBFqnCBsd6RMkjVDRZzb",
            model_id="eleven_multilingual_v2",
            output_format="mp3_44100_128",
        )
        audio_bytes = b"".join(audio)
        out_path = audios_dir / f"output_{shot_num}.mp3"
        out_path.write_bytes(audio_bytes)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/generate_images.py
+++ b/src/scripts/generate_images.py
@@ -0,0 +1,50 @@
 from __future__ import annotations
 import json
 import logging
 from pathlib import Path
 import torch
 from diffusers import FluxPipeline
 from logging_config import configure_logging
 SCRIPT_DIR = Path(__file__).resolve().parent
 PROJECT_ROOT = SCRIPT_DIR.parents[1]
 LOGGER = logging.getLogger(__name__)
 def main() -> int:
    configure_logging("INFO")
    reel_script = PROJECT_ROOT / "reel_script.json"
    images_dir = PROJECT_ROOT / "images"
    images_dir.mkdir(parents=True, exist_ok=True)
    reel_data = json.loads(reel_script.read_text())
    pipe = FluxPipeline.from_pretrained(
        "black-forest-labs/FLUX.1-schnell",
        torch_dtype=torch.bfloat16,
    )
    pipe.enable_model_cpu_offload()
    for shot in reel_data["shots"]:
        shot_num = shot["shot_number"]
        prompt = shot["image_description"]
        LOGGER.info("Generating image for shot %s: %s", shot_num, prompt)
        image = pipe(
            prompt,
            guidance_scale=0.0,
            num_inference_steps=4,
            max_sequence_length=256,
            generator=torch.Generator("cpu").manual_seed(0),
        ).images[0]
        image.save(images_dir / f"shot_{shot_num}.png")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/generate_script.py
+++ b/src/scripts/generate_script.py
@@ -1,9 +1,15 @@
 import torch
 import json
 import logging
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import re
 from typing import Optional
 from logging_config import configure_logging
 LOGGER = logging.getLogger(__name__)
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 MODEL_ID = "Qwen/Qwen3-14B"
 WORDS_PER_SECOND = 2.5  
@@ -174,7 +180,7 @@ def generate_reel_scenario(
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
-    print("Generating reel scenario..")
+    LOGGER.info("Generating reel scenario")
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
@@ -330,6 +336,7 @@ def parse_reel_scenario(raw_scenario: str) -> dict:
 if __name__ == '__main__':
    configure_logging("INFO")
    with open("topic_description.txt", "r") as f:
        topic = f.read()
--- a/src/scripts/generate_videos.py
+++ b/src/scripts/generate_videos.py
@@ -0,0 +1,171 @@
 #!/usr/bin/env python3
 """Generate shot videos with HunyuanVideo based on reel script and audio durations."""
 from __future__ import annotations
 import argparse
 import json
 import logging
 import os
 import subprocess
 from pathlib import Path
 from logging_config import configure_logging
 SCRIPT_DIR = Path(__file__).resolve().parent
 DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
 DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
 DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
 DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
 DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
 DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
 LOGGER = logging.getLogger(__name__)
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
    parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
    parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
    parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
    parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--log-level", default="INFO")
    return parser.parse_args()
 def get_audio_duration(audio_path: Path) -> float:
    result = subprocess.run(
        [
            "ffprobe",
            "-v",
            "error",
            "-show_entries",
            "format=duration",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            str(audio_path),
        ],
        check=True,
        text=True,
        capture_output=True,
    )
    return float(result.stdout.strip())
 def duration_to_video_length(duration: float) -> int:
    frames = int(duration * 24) + 1
    if frames % 2 == 0:
        frames += 1
    return max(49, min(frames, 169))
 def main() -> int:
    args = parse_args()
    configure_logging(args.log_level)
    model_path = args.hunyuan_dir / "ckpts"
    args.videos_dir.mkdir(parents=True, exist_ok=True)
    env = os.environ.copy()
    env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"
    data = json.loads(args.reel_script.read_text())
    shots = data.get("shots", [])
    LOGGER.info("Found %s shots to generate", len(shots))
    for shot in shots:
        shot_number = shot["shot_number"]
        prompt = str(shot["image_description"]).replace("\t", " ").replace("\n", " ")
        image_path = args.images_dir / f"shot_{shot_number}.png"
        output_path = args.videos_dir / f"output_{shot_number}.mp4"
        audio_path = args.audios_dir / f"output_{shot_number}.mp3"
        if not audio_path.exists():
            LOGGER.warning("No audio found at %s, falling back to 5s default", audio_path)
            duration = 5.0
        else:
            duration = get_audio_duration(audio_path)
            LOGGER.info("Audio duration for shot %s: %ss", shot_number, duration)
        video_length = duration_to_video_length(duration)
        LOGGER.info("Shot %s | %ss -> %s frames", shot_number, duration, video_length)
        LOGGER.info("Prompt: %s", prompt)
        LOGGER.info("Image: %s", image_path)
        LOGGER.info("Audio: %s", audio_path)
        LOGGER.info("Output: %s", output_path)
        if output_path.exists():
            LOGGER.info("Output path already exists, skipping")
            continue
        if not image_path.exists():
            LOGGER.warning("Image not found at %s, skipped", image_path)
            continue
        subprocess.run(
            [
                "python3",
                "-c",
                "import torch; torch.cuda.empty_cache()",
            ],
            check=True,
            env=env,
        )
        LOGGER.info("GPU cache cleared")
        subprocess.run(
            [
                "torchrun",
                "--nproc_per_node=1",
                "generate.py",
                "--prompt",
                prompt,
                "--image_path",
                str(image_path),
                "--resolution",
                "480p",
                "--aspect_ratio",
                "16:9",
                "--seed",
                str(args.seed),
                "--video_length",
                str(video_length),
                "--rewrite",
                "false",
                "--cfg_distilled",
                "true",
                "--enable_step_distill",
                "true",
                "--sparse_attn",
                "false",
                "--use_sageattn",
                "true",
                "--enable_cache",
                "false",
                "--overlap_group_offloading",
                "true",
                "--sr",
                "false",
                "--output_path",
                str(output_path),
                "--model_path",
                str(model_path),
            ],
            check=True,
            cwd=args.hunyuan_dir,
            env=env,
        )
        LOGGER.info("Shot %s done", shot_number)
    LOGGER.info("Done")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/logging_config.py
+++ b/src/scripts/logging_config.py
@@ -0,0 +1,13 @@
 from __future__ import annotations
 import logging
 DEFAULT_LOG_FORMAT = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
 def configure_logging(level: str = "INFO") -> None:
    logging.basicConfig(
        level=getattr(logging, level.upper(), logging.INFO),
        format=DEFAULT_LOG_FORMAT,
    )
--- a/src/scripts/merge_audio_video.py
+++ b/src/scripts/merge_audio_video.py
@@ -0,0 +1,85 @@
 #!/usr/bin/env python3
 """Merge videos/output_n.mp4 with audios/output_n.mp3 into merged/merged_n.mp4."""
 from __future__ import annotations
 import argparse
 import logging
 import re
 import subprocess
 from pathlib import Path
 from logging_config import configure_logging
 SCRIPT_DIR = Path(__file__).resolve().parent
 DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
 DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
 DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
 DEFAULT_OUTPUT_DIR = DEFAULT_BASE_DIR / "merged"
 LOGGER = logging.getLogger(__name__)
 def shot_number(path: Path) -> int:
    match = re.search(r"output_(\d+)\.mp4$", path.name)
    return int(match.group(1)) if match else -1
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
    parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
    parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
    parser.add_argument("--log-level", default="INFO")
    return parser.parse_args()
 def main() -> int:
    args = parse_args()
    configure_logging(args.log_level)
    args.output_dir.mkdir(parents=True, exist_ok=True)
    videos = sorted(args.videos_dir.glob("output_*.mp4"), key=shot_number)
    if not videos:
        LOGGER.warning("No videos found in %s", args.videos_dir)
        return 1
    for video in videos:
        num = shot_number(video)
        audio = args.audios_dir / f"output_{num}.mp3"
        output = args.output_dir / f"merged_{num}.mp4"
        if not audio.exists():
            LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio)
            continue
        if output.exists():
            LOGGER.info("Already exists; skipped shot %s", num)
            continue
        LOGGER.info("Merging shot %s: %s + %s -> %s", num, video, audio, output)
        subprocess.run(
            [
                "ffmpeg",
                "-i",
                str(video),
                "-i",
                str(audio),
                "-c:v",
                "copy",
                "-c:a",
                "aac",
                "-shortest",
                "-y",
                str(output),
            ],
            check=True,
        )
        LOGGER.info("Done: %s", output)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/run_video_pipeline.py
+++ b/src/scripts/run_video_pipeline.py
@@ -0,0 +1,163 @@
 #!/usr/bin/env python3
 """Run the full video pipeline: generate, merge, and concatenate."""
 from __future__ import annotations
 import argparse
 import logging
 import os
 import subprocess
 import sys
 from pathlib import Path
 from logging_config import configure_logging
 from s3_video_storage import S3VideoStorage
 SCRIPT_DIR = Path(__file__).resolve().parent
 PROJECT_ROOT = SCRIPT_DIR.parents[1]
 DEFAULT_BASE_DIR = PROJECT_ROOT
 DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
 DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
 DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
 DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
 DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
 DEFAULT_MERGED_DIR = DEFAULT_BASE_DIR / "merged"
 DEFAULT_OUTPUT = DEFAULT_BASE_DIR / "results" / "final_output.mp4"
 LOGGER = logging.getLogger(__name__)
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--base-dir", type=Path, default=DEFAULT_BASE_DIR)
    parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
    parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
    parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
    parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
    parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
    parser.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--skip-generate", action="store_true")
    parser.add_argument("--skip-merge", action="store_true")
    parser.add_argument("--skip-concat", action="store_true")
    parser.add_argument("--skip-s3-upload", action="store_true")
    parser.add_argument("--log-level", default="INFO")
    return parser.parse_args()
 def run_step(name: str, cmd: list[str]) -> None:
    LOGGER.info("=== %s ===", name)
    LOGGER.info("$ %s", " ".join(str(part) for part in cmd))
    subprocess.run(cmd, check=True)
 def maybe_upload_to_s3(output_path: Path) -> None:
    bucket = os.getenv("AWS_S3_BUCKET")
    if not bucket:
        LOGGER.warning("Skipping S3 upload: AWS_S3_BUCKET is not set")
        return
    storage = S3VideoStorage(
        {
            "bucket_name": bucket,
            "region_name": os.getenv("AWS_REGION"),
            "endpoint_url": os.getenv("AWS_S3_ENDPOINT_URL"),
            "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
            "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
            "aws_session_token": os.getenv("AWS_SESSION_TOKEN"),
        }
    )
    s3_uri = storage.store_file(output_path)
    LOGGER.info("Uploaded output to %s", s3_uri)
 def main() -> int:
    args = parse_args()
    configure_logging(args.log_level)
    # If only base-dir is overridden, derive the common subpaths from it.
    if args.base_dir != DEFAULT_BASE_DIR:
        if args.hunyuan_dir == DEFAULT_HUNYUAN_DIR:
            args.hunyuan_dir = args.base_dir / "HunyuanVideo-1.5"
        if args.reel_script == DEFAULT_REEL_SCRIPT:
            args.reel_script = args.base_dir / "reel_script.json"
        if args.images_dir == DEFAULT_IMAGES_DIR:
            args.images_dir = args.base_dir / "images"
        if args.videos_dir == DEFAULT_VIDEOS_DIR:
            args.videos_dir = args.base_dir / "videos"
        if args.audios_dir == DEFAULT_AUDIOS_DIR:
            args.audios_dir = args.base_dir / "audios"
        if args.merged_dir == DEFAULT_MERGED_DIR:
            args.merged_dir = args.base_dir / "merged"
        if args.output == DEFAULT_OUTPUT:
            args.output = args.base_dir / "results" / "final_output.mp4"
    try:
        if not args.skip_generate:
            run_step(
                "Generate Videos",
                [
                    sys.executable,
                    str(SCRIPT_DIR / "generate_videos.py"),
                    "--hunyuan-dir",
                    str(args.hunyuan_dir),
                    "--reel-script",
                    str(args.reel_script),
                    "--images-dir",
                    str(args.images_dir),
                    "--videos-dir",
                    str(args.videos_dir),
                    "--audios-dir",
                    str(args.audios_dir),
                    "--seed",
                    str(args.seed),
                ],
            )
        if not args.skip_merge:
            run_step(
                "Merge Audio + Video",
                [
                    sys.executable,
                    str(SCRIPT_DIR / "merge_audio_video.py"),
                    "--videos-dir",
                    str(args.videos_dir),
                    "--audios-dir",
                    str(args.audios_dir),
                    "--output-dir",
                    str(args.merged_dir),
                ],
            )
        if not args.skip_concat:
            run_step(
                "Concatenate Merged Videos",
                [
                    sys.executable,
                    str(SCRIPT_DIR / "concat_merged.py"),
                    "--merged-dir",
                    str(args.merged_dir),
                    "--output",
                    str(args.output),
                ],
            )
    except subprocess.CalledProcessError as exc:
        LOGGER.exception("Pipeline failed at command: %s", exc.cmd)
        return exc.returncode
    if not args.skip_s3_upload:
        try:
            maybe_upload_to_s3(args.output)
        except Exception:
            LOGGER.exception("Failed uploading output to S3")
            return 1
    LOGGER.info("Pipeline complete")
    LOGGER.info("Final output: %s", args.output)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/scripts/s3_video_storage.py
+++ b/src/scripts/s3_video_storage.py
		`@@ -1 +0,0 @@`
			`ELEVENLABS_API_KEY=sk_e343522cb3fd4da2d46844e81e1152e3de2a72cd1430a383`