forked from LiveCarta/ContentGeneration
Refactored code, added Dockerfile, replaced bash scripts with python alternatives, added README with instructions on running a pipeline
This commit is contained in:
@@ -42,6 +42,7 @@ Thumbs.db
|
||||
# Local env and logs
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
*.log
|
||||
*.pid
|
||||
|
||||
@@ -50,3 +51,13 @@ Thumbs.db
|
||||
*.mov
|
||||
*.avi
|
||||
*.mkv
|
||||
|
||||
# Project generated data and checkpoints
|
||||
images/
|
||||
audios/
|
||||
videos/
|
||||
merged/
|
||||
results/
|
||||
outputs/
|
||||
ckpts/
|
||||
HunyuanVideo-1.5/ckpts/
|
||||
|
||||
1
.env
1
.env
@@ -1 +0,0 @@
|
||||
ELEVENLABS_API_KEY=sk_e343522cb3fd4da2d46844e81e1152e3de2a72cd1430a383
|
||||
19
.env.example
Normal file
19
.env.example
Normal file
@@ -0,0 +1,19 @@
|
||||
# ElevenLabs
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# Hugging Face (required for gated model downloads, e.g. FLUX.1-schnell)
|
||||
HUGGINGFACE_HUB_TOKEN=
|
||||
|
||||
# Hunyuan prompt rewrite endpoints (optional; rewrite is disabled in current generate_videos.py)
|
||||
T2V_REWRITE_BASE_URL=
|
||||
T2V_REWRITE_MODEL_NAME=
|
||||
I2V_REWRITE_BASE_URL=
|
||||
I2V_REWRITE_MODEL_NAME=
|
||||
|
||||
# AWS / S3 (used when initializing S3VideoStorage)
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_SESSION_TOKEN=
|
||||
AWS_REGION=
|
||||
AWS_S3_BUCKET=
|
||||
AWS_S3_ENDPOINT_URL=
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -58,6 +58,7 @@ Thumbs.db
|
||||
# Local environment variables
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
|
||||
# Project-specific artifacts
|
||||
*.mp4
|
||||
|
||||
66
Dockerfile
Normal file
66
Dockerfile
Normal file
@@ -0,0 +1,66 @@
|
||||
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128
|
||||
|
||||
# Base OS tools + media stack + Python toolchain.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.10 \
|
||||
python3-pip \
|
||||
python3.10-dev \
|
||||
python3.10-venv \
|
||||
ffmpeg \
|
||||
git \
|
||||
git-lfs \
|
||||
ca-certificates \
|
||||
curl \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
ninja-build \
|
||||
libglib2.0-0 \
|
||||
libgl1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.10 /usr/bin/python \
|
||||
&& ln -sf /usr/bin/pip3 /usr/bin/pip \
|
||||
&& git lfs install
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install project Python dependencies first for better layer caching.
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
|
||||
RUN python -m pip install --upgrade pip setuptools wheel \
|
||||
&& pip install --index-url https://download.pytorch.org/whl/cu121 torch torchvision torchaudio \
|
||||
&& pip install -r /app/requirements.txt \
|
||||
&& pip install -U accelerate safetensors
|
||||
|
||||
# Copy project code.
|
||||
COPY . /app
|
||||
|
||||
# Ensure HunyuanVideo source exists in the image.
|
||||
ARG HUNYUAN_REPO=https://github.com/Tencent-Hunyuan/HunyuanVideo-1.5.git
|
||||
RUN if [ ! -f /app/HunyuanVideo-1.5/requirements.txt ]; then \
|
||||
rm -rf /app/HunyuanVideo-1.5 && \
|
||||
git clone --depth 1 "$HUNYUAN_REPO" /app/HunyuanVideo-1.5; \
|
||||
fi
|
||||
|
||||
# Install HunyuanVideo dependencies from upstream README guidance.
|
||||
RUN pip install -r /app/HunyuanVideo-1.5/requirements.txt \
|
||||
&& pip install --upgrade tencentcloud-sdk-python \
|
||||
&& pip install sgl-kernel==0.3.18
|
||||
|
||||
# Optional attention backends from Hunyuan docs.
|
||||
# Build with: --build-arg INSTALL_OPTIONAL_ATTENTION=1
|
||||
ARG INSTALL_OPTIONAL_ATTENTION=0
|
||||
RUN if [ "$INSTALL_OPTIONAL_ATTENTION" = "1" ]; then \
|
||||
pip install flash-attn --no-build-isolation && \
|
||||
git clone --depth 1 https://github.com/Tencent-Hunyuan/flex-block-attn.git /tmp/flex-block-attn && \
|
||||
cd /tmp/flex-block-attn && git submodule update --init --recursive && python setup.py install && \
|
||||
git clone --depth 1 https://github.com/cooper1637/SageAttention.git /tmp/SageAttention && \
|
||||
cd /tmp/SageAttention && python setup.py install; \
|
||||
fi
|
||||
|
||||
# Default pipeline entrypoint.
|
||||
CMD ["python", "run_video_pipeline.py"]
|
||||
202
README.md
Normal file
202
README.md
Normal file
@@ -0,0 +1,202 @@
|
||||
# ContentGeneration Pipeline
|
||||
|
||||
This project runs a 3-step video pipeline:
|
||||
|
||||
1. Generate shot videos from images + prompts.
|
||||
2. Merge each generated video with its audio.
|
||||
3. Concatenate merged clips into one final output.
|
||||
|
||||
The pipeline entrypoint is `run_video_pipeline.py`.
|
||||
|
||||
## Quick Start
|
||||
|
||||
Local Python:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
python3 -m venv .venv && source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python run_video_pipeline.py
|
||||
```
|
||||
|
||||
Docker (GPU):
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
docker build -t content-generation:latest .
|
||||
docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest
|
||||
```
|
||||
|
||||
First run (skip S3 upload):
|
||||
|
||||
```bash
|
||||
python run_video_pipeline.py --skip-s3-upload
|
||||
```
|
||||
|
||||
Docker first run (skip S3 upload):
|
||||
|
||||
```bash
|
||||
docker run --rm --gpus all --env-file .env -v "$(pwd)":/app -w /app content-generation:latest \
|
||||
python run_video_pipeline.py --skip-s3-upload
|
||||
```
|
||||
|
||||
## Project Layout
|
||||
|
||||
- `run_video_pipeline.py`: main entrypoint.
|
||||
- `src/scripts/`: helper scripts used by the pipeline.
|
||||
- `HunyuanVideo-1.5/`: Hunyuan inference code and model dependencies.
|
||||
- `reel_script.json`: required script input with `shots`.
|
||||
- `images/`, `audios/`, `videos/`, `merged/`, `results/`: working/output folders.
|
||||
- `.env.example`: environment variable template.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. Linux with NVIDIA GPU and CUDA runtime.
|
||||
2. `ffmpeg` and `ffprobe` available on PATH.
|
||||
3. Python 3.10+.
|
||||
4. Hunyuan model checkpoints under `HunyuanVideo-1.5/ckpts`.
|
||||
5. If using FLUX local download, access approved for `black-forest-labs/FLUX.1-schnell`.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
1. Create local env file:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
2. Fill required variables in `.env`:
|
||||
- `ELEVENLABS_API_KEY` for audio generation.
|
||||
- `HUGGINGFACE_HUB_TOKEN` if gated Hugging Face model access is needed.
|
||||
- `AWS_S3_BUCKET` (+ optional AWS vars) if you want final output uploaded to S3.
|
||||
|
||||
## Run Locally (Python)
|
||||
|
||||
1. Create and activate a virtual environment:
|
||||
|
||||
```bash
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
2. Install Python dependencies:
|
||||
|
||||
```bash
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Install Hunyuan dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r HunyuanVideo-1.5/requirements.txt
|
||||
pip install --upgrade tencentcloud-sdk-python
|
||||
pip install sgl-kernel==0.3.18
|
||||
```
|
||||
|
||||
4. Run full pipeline:
|
||||
|
||||
```bash
|
||||
python run_video_pipeline.py
|
||||
```
|
||||
|
||||
5. Common options:
|
||||
|
||||
```bash
|
||||
# Skip generation and only merge + concat
|
||||
python run_video_pipeline.py --skip-generate
|
||||
|
||||
# Skip S3 upload
|
||||
python run_video_pipeline.py --skip-s3-upload
|
||||
|
||||
# Override base directory
|
||||
python run_video_pipeline.py --base-dir /absolute/path/to/workdir
|
||||
|
||||
# Change logging verbosity
|
||||
python run_video_pipeline.py --log-level DEBUG
|
||||
```
|
||||
|
||||
## Run with Docker
|
||||
|
||||
1. Build image:
|
||||
|
||||
```bash
|
||||
docker build -t content-generation:latest .
|
||||
```
|
||||
|
||||
2. Optional build with extra attention backends:
|
||||
|
||||
```bash
|
||||
docker build -t content-generation:latest --build-arg INSTALL_OPTIONAL_ATTENTION=1 .
|
||||
```
|
||||
|
||||
3. Run pipeline in container (GPU required):
|
||||
|
||||
```bash
|
||||
docker run --rm --gpus all \
|
||||
--env-file .env \
|
||||
-v "$(pwd)":/app \
|
||||
-w /app \
|
||||
content-generation:latest
|
||||
```
|
||||
|
||||
4. Pass extra pipeline args:
|
||||
|
||||
```bash
|
||||
docker run --rm --gpus all \
|
||||
--env-file .env \
|
||||
-v "$(pwd)":/app \
|
||||
-w /app \
|
||||
content-generation:latest \
|
||||
python run_video_pipeline.py --skip-s3-upload --log-level DEBUG
|
||||
```
|
||||
|
||||
## Input Expectations
|
||||
|
||||
1. `reel_script.json` must exist and contain a `shots` array.
|
||||
2. `images/shot_<n>.png` and `audios/output_<n>.mp3` should align by shot number.
|
||||
3. Final output is written by default to `results/final_output.mp4`.
|
||||
|
||||
## S3 Upload Behavior
|
||||
|
||||
1. If `AWS_S3_BUCKET` is set, the pipeline uploads final output to S3 using `S3VideoStorage`.
|
||||
2. If `AWS_S3_BUCKET` is missing, upload is skipped with a warning.
|
||||
3. Disable upload explicitly with `--skip-s3-upload`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. `torch.cuda.is_available()` is false in Docker.
|
||||
- Run with GPU flags: `docker run --gpus all ...`
|
||||
- Verify NVIDIA Container Toolkit is installed on host.
|
||||
- Check host GPU visibility: `nvidia-smi`.
|
||||
|
||||
2. `ffmpeg` or `ffprobe` not found.
|
||||
- Local: install ffmpeg with your package manager.
|
||||
- Docker: ffmpeg is installed in the provided Dockerfile.
|
||||
|
||||
3. Hunyuan generate step fails due to missing checkpoints.
|
||||
- Ensure checkpoints are available under `HunyuanVideo-1.5/ckpts`.
|
||||
- Confirm mounted project path in Docker includes checkpoints.
|
||||
|
||||
4. Hugging Face model download fails (401/403).
|
||||
- Accept model access terms for gated models (for example FLUX.1-schnell).
|
||||
- Set `HUGGINGFACE_HUB_TOKEN` in `.env`.
|
||||
|
||||
5. S3 upload fails.
|
||||
- Confirm `AWS_S3_BUCKET` is set.
|
||||
- If needed, set `AWS_REGION` and credentials (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional `AWS_SESSION_TOKEN`).
|
||||
- For S3-compatible providers, set `AWS_S3_ENDPOINT_URL`.
|
||||
|
||||
6. Permission issues when running Docker with mounted volumes.
|
||||
- Use your host user mapping if needed:
|
||||
`docker run --rm --gpus all -u "$(id -u):$(id -g)" ...`
|
||||
|
||||
7. Out-of-memory during video generation.
|
||||
- Keep `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128`.
|
||||
- Reduce workload by skipping optional enhancements or lowering resolution/steps in generation scripts.
|
||||
|
||||
8. Verify syntax quickly before running.
|
||||
|
||||
```bash
|
||||
python3 -m py_compile run_video_pipeline.py src/scripts/*.py
|
||||
```
|
||||
@@ -1,35 +0,0 @@
|
||||
from elevenlabs.client import ElevenLabs
|
||||
from elevenlabs.play import play
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
ELEVENLABS_API_KEY = os.getenv('ELEVENLABS_API_KEY')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
script_path = "reel_script.json"
|
||||
with open(script_path, "r") as f:
|
||||
reel_data = json.load(f)
|
||||
|
||||
client = ElevenLabs(
|
||||
api_key=ELEVENLABS_API_KEY
|
||||
)
|
||||
for shot in reel_data["shots"]:
|
||||
print(shot["shot_number"], shot["voiceover"])
|
||||
prompt = shot["voiceover"]
|
||||
audio = client.text_to_speech.convert(
|
||||
text=prompt,
|
||||
voice_id="JBFqnCBsd6RMkjVDRZzb",
|
||||
model_id="eleven_multilingual_v2",
|
||||
output_format="mp3_44100_128",
|
||||
)
|
||||
|
||||
audio_bytes = b"".join(audio)
|
||||
|
||||
if not os.path.exists("audios"):
|
||||
os.makedirs("audios")
|
||||
with open(f"audios/output_{shot["shot_number"]}.mp3", "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
@@ -1,28 +0,0 @@
|
||||
import torch
|
||||
from diffusers import FluxPipeline
|
||||
import json
|
||||
import os
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
script_path = "reel_script.json"
|
||||
with open(script_path, "r") as f:
|
||||
reel_data = json.load(f)
|
||||
|
||||
pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
for shot in reel_data["shots"]:
|
||||
print(shot["shot_number"], shot["image_description"])
|
||||
prompt = shot["image_description"]
|
||||
image = pipe(
|
||||
prompt,
|
||||
guidance_scale=0.0,
|
||||
num_inference_steps=4,
|
||||
max_sequence_length=256,
|
||||
generator=torch.Generator("cpu").manual_seed(0)
|
||||
).images[0]
|
||||
|
||||
if not os.path.exists("images"):
|
||||
os.makedirs("images")
|
||||
image.save(f"images/shot_{shot["shot_number"]}.png")
|
||||
17
requirements.txt
Normal file
17
requirements.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
# Core project dependencies inferred from imports in this workspace
|
||||
boto3
|
||||
python-dotenv
|
||||
elevenlabs
|
||||
torch
|
||||
transformers
|
||||
diffusers
|
||||
accelerate
|
||||
safetensors
|
||||
huggingface-hub
|
||||
|
||||
# Optional but commonly required for 4-bit quantization with BitsAndBytesConfig
|
||||
bitsandbytes
|
||||
|
||||
# Notes:
|
||||
# - ffmpeg/ffprobe are required by video scripts but installed at OS level, not via pip.
|
||||
# - torchrun is provided by the torch package.
|
||||
163
run_video_pipeline.py
Normal file
163
run_video_pipeline.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run the full video pipeline: generate, merge, and concatenate."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from src.scripts.logging_config import configure_logging
|
||||
from src.scripts.s3_video_storage import S3VideoStorage
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent
|
||||
SCRIPT_DIR = PROJECT_ROOT / "src" / "scripts"
|
||||
DEFAULT_BASE_DIR = PROJECT_ROOT
|
||||
DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
|
||||
DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
|
||||
DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
|
||||
DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
|
||||
DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
|
||||
DEFAULT_MERGED_DIR = DEFAULT_BASE_DIR / "merged"
|
||||
DEFAULT_OUTPUT = DEFAULT_BASE_DIR / "results" / "final_output.mp4"
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--base-dir", type=Path, default=DEFAULT_BASE_DIR)
|
||||
parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
|
||||
parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
|
||||
parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
|
||||
parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
|
||||
parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
|
||||
parser.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
|
||||
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
|
||||
parser.add_argument("--seed", type=int, default=1)
|
||||
parser.add_argument("--skip-generate", action="store_true")
|
||||
parser.add_argument("--skip-merge", action="store_true")
|
||||
parser.add_argument("--skip-concat", action="store_true")
|
||||
parser.add_argument("--skip-s3-upload", action="store_true")
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def run_step(name: str, cmd: list[str]) -> None:
|
||||
LOGGER.info("=== %s ===", name)
|
||||
LOGGER.info("$ %s", " ".join(str(part) for part in cmd))
|
||||
subprocess.run(cmd, check=True)
|
||||
|
||||
|
||||
def maybe_upload_to_s3(output_path: Path) -> None:
|
||||
bucket = os.getenv("AWS_S3_BUCKET")
|
||||
if not bucket:
|
||||
LOGGER.warning("Skipping S3 upload: AWS_S3_BUCKET is not set")
|
||||
return
|
||||
|
||||
storage = S3VideoStorage(
|
||||
{
|
||||
"bucket_name": bucket,
|
||||
"region_name": os.getenv("AWS_REGION"),
|
||||
"endpoint_url": os.getenv("AWS_S3_ENDPOINT_URL"),
|
||||
"aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
"aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
"aws_session_token": os.getenv("AWS_SESSION_TOKEN"),
|
||||
}
|
||||
)
|
||||
s3_uri = storage.store_file(output_path)
|
||||
LOGGER.info("Uploaded output to %s", s3_uri)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
configure_logging(args.log_level)
|
||||
|
||||
# If only base-dir is overridden, derive the common subpaths from it.
|
||||
if args.base_dir != DEFAULT_BASE_DIR:
|
||||
if args.hunyuan_dir == DEFAULT_HUNYUAN_DIR:
|
||||
args.hunyuan_dir = args.base_dir / "HunyuanVideo-1.5"
|
||||
if args.reel_script == DEFAULT_REEL_SCRIPT:
|
||||
args.reel_script = args.base_dir / "reel_script.json"
|
||||
if args.images_dir == DEFAULT_IMAGES_DIR:
|
||||
args.images_dir = args.base_dir / "images"
|
||||
if args.videos_dir == DEFAULT_VIDEOS_DIR:
|
||||
args.videos_dir = args.base_dir / "videos"
|
||||
if args.audios_dir == DEFAULT_AUDIOS_DIR:
|
||||
args.audios_dir = args.base_dir / "audios"
|
||||
if args.merged_dir == DEFAULT_MERGED_DIR:
|
||||
args.merged_dir = args.base_dir / "merged"
|
||||
if args.output == DEFAULT_OUTPUT:
|
||||
args.output = args.base_dir / "results" / "final_output.mp4"
|
||||
|
||||
try:
|
||||
if not args.skip_generate:
|
||||
run_step(
|
||||
"Generate Videos",
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "generate_videos.py"),
|
||||
"--hunyuan-dir",
|
||||
str(args.hunyuan_dir),
|
||||
"--reel-script",
|
||||
str(args.reel_script),
|
||||
"--images-dir",
|
||||
str(args.images_dir),
|
||||
"--videos-dir",
|
||||
str(args.videos_dir),
|
||||
"--audios-dir",
|
||||
str(args.audios_dir),
|
||||
"--seed",
|
||||
str(args.seed),
|
||||
],
|
||||
)
|
||||
|
||||
if not args.skip_merge:
|
||||
run_step(
|
||||
"Merge Audio + Video",
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "merge_audio_video.py"),
|
||||
"--videos-dir",
|
||||
str(args.videos_dir),
|
||||
"--audios-dir",
|
||||
str(args.audios_dir),
|
||||
"--output-dir",
|
||||
str(args.merged_dir),
|
||||
],
|
||||
)
|
||||
|
||||
if not args.skip_concat:
|
||||
run_step(
|
||||
"Concatenate Merged Videos",
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "concat_merged.py"),
|
||||
"--merged-dir",
|
||||
str(args.merged_dir),
|
||||
"--output",
|
||||
str(args.output),
|
||||
],
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
LOGGER.exception("Pipeline failed at command: %s", exc.cmd)
|
||||
return exc.returncode
|
||||
|
||||
if not args.skip_s3_upload:
|
||||
try:
|
||||
maybe_upload_to_s3(args.output)
|
||||
except Exception:
|
||||
LOGGER.exception("Failed uploading output to S3")
|
||||
return 1
|
||||
|
||||
LOGGER.info("Pipeline complete")
|
||||
LOGGER.info("Final output: %s", args.output)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
80
src/scripts/concat_merged.py
Normal file
80
src/scripts/concat_merged.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Concatenate merged_*.mp4 files into a single output using ffmpeg concat demuxer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from logging_config import configure_logging
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
|
||||
DEFAULT_MERGED_DIR = DEFAULT_BASE_DIR / "merged"
|
||||
DEFAULT_OUTPUT = DEFAULT_BASE_DIR / "results" / "run_3" / "final_output.mp4"
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def shot_number(path: Path) -> int:
|
||||
match = re.search(r"merged_(\d+)\.mp4$", path.name)
|
||||
return int(match.group(1)) if match else -1
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
|
||||
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
configure_logging(args.log_level)
|
||||
|
||||
videos = sorted(args.merged_dir.glob("merged_*.mp4"), key=shot_number)
|
||||
if not videos:
|
||||
LOGGER.warning("No merged videos found in %s", args.merged_dir)
|
||||
return 1
|
||||
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as tmp:
|
||||
filelist = Path(tmp.name)
|
||||
for video in videos:
|
||||
tmp.write(f"file '{video}'\\n")
|
||||
|
||||
try:
|
||||
LOGGER.info("Concatenating the following files:\n%s", filelist.read_text().rstrip())
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-f",
|
||||
"concat",
|
||||
"-safe",
|
||||
"0",
|
||||
"-i",
|
||||
str(filelist),
|
||||
"-c",
|
||||
"copy",
|
||||
"-y",
|
||||
str(args.output),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
finally:
|
||||
filelist.unlink(missing_ok=True)
|
||||
|
||||
LOGGER.info("Done")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
54
src/scripts/generate_audios.py
Normal file
54
src/scripts/generate_audios.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from elevenlabs.client import ElevenLabs
|
||||
from logging_config import configure_logging
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
PROJECT_ROOT = SCRIPT_DIR.parents[1]
|
||||
|
||||
load_dotenv(PROJECT_ROOT / ".env")
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
configure_logging("INFO")
|
||||
api_key = os.getenv("ELEVENLABS_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("ELEVENLABS_API_KEY is not set")
|
||||
|
||||
reel_script = PROJECT_ROOT / "reel_script.json"
|
||||
audios_dir = PROJECT_ROOT / "audios"
|
||||
audios_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
reel_data = json.loads(reel_script.read_text())
|
||||
client = ElevenLabs(api_key=api_key)
|
||||
|
||||
for shot in reel_data["shots"]:
|
||||
shot_num = shot["shot_number"]
|
||||
prompt = shot["voiceover"]
|
||||
LOGGER.info("Generating audio for shot %s: %s", shot_num, prompt)
|
||||
|
||||
audio = client.text_to_speech.convert(
|
||||
text=prompt,
|
||||
voice_id="JBFqnCBsd6RMkjVDRZzb",
|
||||
model_id="eleven_multilingual_v2",
|
||||
output_format="mp3_44100_128",
|
||||
)
|
||||
audio_bytes = b"".join(audio)
|
||||
|
||||
out_path = audios_dir / f"output_{shot_num}.mp3"
|
||||
out_path.write_bytes(audio_bytes)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
50
src/scripts/generate_images.py
Normal file
50
src/scripts/generate_images.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from diffusers import FluxPipeline
|
||||
from logging_config import configure_logging
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
PROJECT_ROOT = SCRIPT_DIR.parents[1]
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
configure_logging("INFO")
|
||||
reel_script = PROJECT_ROOT / "reel_script.json"
|
||||
images_dir = PROJECT_ROOT / "images"
|
||||
images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
reel_data = json.loads(reel_script.read_text())
|
||||
|
||||
pipe = FluxPipeline.from_pretrained(
|
||||
"black-forest-labs/FLUX.1-schnell",
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
for shot in reel_data["shots"]:
|
||||
shot_num = shot["shot_number"]
|
||||
prompt = shot["image_description"]
|
||||
LOGGER.info("Generating image for shot %s: %s", shot_num, prompt)
|
||||
|
||||
image = pipe(
|
||||
prompt,
|
||||
guidance_scale=0.0,
|
||||
num_inference_steps=4,
|
||||
max_sequence_length=256,
|
||||
generator=torch.Generator("cpu").manual_seed(0),
|
||||
).images[0]
|
||||
image.save(images_dir / f"shot_{shot_num}.png")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,9 +1,15 @@
|
||||
import torch
|
||||
import json
|
||||
import logging
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from logging_config import configure_logging
|
||||
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
MODEL_ID = "Qwen/Qwen3-14B"
|
||||
WORDS_PER_SECOND = 2.5
|
||||
@@ -174,7 +180,7 @@ def generate_reel_scenario(
|
||||
|
||||
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
||||
|
||||
print("Generating reel scenario..")
|
||||
LOGGER.info("Generating reel scenario")
|
||||
with torch.no_grad():
|
||||
output_ids = model.generate(
|
||||
**inputs,
|
||||
@@ -330,6 +336,7 @@ def parse_reel_scenario(raw_scenario: str) -> dict:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
configure_logging("INFO")
|
||||
|
||||
with open("topic_description.txt", "r") as f:
|
||||
topic = f.read()
|
||||
171
src/scripts/generate_videos.py
Normal file
171
src/scripts/generate_videos.py
Normal file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate shot videos with HunyuanVideo based on reel script and audio durations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from logging_config import configure_logging
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
|
||||
DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
|
||||
DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
|
||||
DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
|
||||
DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
|
||||
DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
|
||||
parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
|
||||
parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
|
||||
parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
|
||||
parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
|
||||
parser.add_argument("--seed", type=int, default=1)
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def get_audio_duration(audio_path: Path) -> float:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_entries",
|
||||
"format=duration",
|
||||
"-of",
|
||||
"default=noprint_wrappers=1:nokey=1",
|
||||
str(audio_path),
|
||||
],
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
return float(result.stdout.strip())
|
||||
|
||||
|
||||
def duration_to_video_length(duration: float) -> int:
|
||||
frames = int(duration * 24) + 1
|
||||
if frames % 2 == 0:
|
||||
frames += 1
|
||||
return max(49, min(frames, 169))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
configure_logging(args.log_level)
|
||||
model_path = args.hunyuan_dir / "ckpts"
|
||||
|
||||
args.videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
env = os.environ.copy()
|
||||
env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"
|
||||
|
||||
data = json.loads(args.reel_script.read_text())
|
||||
shots = data.get("shots", [])
|
||||
LOGGER.info("Found %s shots to generate", len(shots))
|
||||
|
||||
for shot in shots:
|
||||
shot_number = shot["shot_number"]
|
||||
prompt = str(shot["image_description"]).replace("\t", " ").replace("\n", " ")
|
||||
|
||||
image_path = args.images_dir / f"shot_{shot_number}.png"
|
||||
output_path = args.videos_dir / f"output_{shot_number}.mp4"
|
||||
audio_path = args.audios_dir / f"output_{shot_number}.mp3"
|
||||
|
||||
if not audio_path.exists():
|
||||
LOGGER.warning("No audio found at %s, falling back to 5s default", audio_path)
|
||||
duration = 5.0
|
||||
else:
|
||||
duration = get_audio_duration(audio_path)
|
||||
LOGGER.info("Audio duration for shot %s: %ss", shot_number, duration)
|
||||
|
||||
video_length = duration_to_video_length(duration)
|
||||
|
||||
LOGGER.info("Shot %s | %ss -> %s frames", shot_number, duration, video_length)
|
||||
LOGGER.info("Prompt: %s", prompt)
|
||||
LOGGER.info("Image: %s", image_path)
|
||||
LOGGER.info("Audio: %s", audio_path)
|
||||
LOGGER.info("Output: %s", output_path)
|
||||
|
||||
if output_path.exists():
|
||||
LOGGER.info("Output path already exists, skipping")
|
||||
continue
|
||||
|
||||
if not image_path.exists():
|
||||
LOGGER.warning("Image not found at %s, skipped", image_path)
|
||||
continue
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"python3",
|
||||
"-c",
|
||||
"import torch; torch.cuda.empty_cache()",
|
||||
],
|
||||
check=True,
|
||||
env=env,
|
||||
)
|
||||
LOGGER.info("GPU cache cleared")
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"torchrun",
|
||||
"--nproc_per_node=1",
|
||||
"generate.py",
|
||||
"--prompt",
|
||||
prompt,
|
||||
"--image_path",
|
||||
str(image_path),
|
||||
"--resolution",
|
||||
"480p",
|
||||
"--aspect_ratio",
|
||||
"16:9",
|
||||
"--seed",
|
||||
str(args.seed),
|
||||
"--video_length",
|
||||
str(video_length),
|
||||
"--rewrite",
|
||||
"false",
|
||||
"--cfg_distilled",
|
||||
"true",
|
||||
"--enable_step_distill",
|
||||
"true",
|
||||
"--sparse_attn",
|
||||
"false",
|
||||
"--use_sageattn",
|
||||
"true",
|
||||
"--enable_cache",
|
||||
"false",
|
||||
"--overlap_group_offloading",
|
||||
"true",
|
||||
"--sr",
|
||||
"false",
|
||||
"--output_path",
|
||||
str(output_path),
|
||||
"--model_path",
|
||||
str(model_path),
|
||||
],
|
||||
check=True,
|
||||
cwd=args.hunyuan_dir,
|
||||
env=env,
|
||||
)
|
||||
|
||||
LOGGER.info("Shot %s done", shot_number)
|
||||
|
||||
LOGGER.info("Done")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
13
src/scripts/logging_config.py
Normal file
13
src/scripts/logging_config.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
DEFAULT_LOG_FORMAT = "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
|
||||
|
||||
|
||||
def configure_logging(level: str = "INFO") -> None:
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, level.upper(), logging.INFO),
|
||||
format=DEFAULT_LOG_FORMAT,
|
||||
)
|
||||
85
src/scripts/merge_audio_video.py
Normal file
85
src/scripts/merge_audio_video.py
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Merge videos/output_n.mp4 with audios/output_n.mp3 into merged/merged_n.mp4."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from logging_config import configure_logging
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
DEFAULT_BASE_DIR = SCRIPT_DIR.parents[1]
|
||||
DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
|
||||
DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
|
||||
DEFAULT_OUTPUT_DIR = DEFAULT_BASE_DIR / "merged"
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def shot_number(path: Path) -> int:
|
||||
match = re.search(r"output_(\d+)\.mp4$", path.name)
|
||||
return int(match.group(1)) if match else -1
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
|
||||
parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
|
||||
parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
configure_logging(args.log_level)
|
||||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
videos = sorted(args.videos_dir.glob("output_*.mp4"), key=shot_number)
|
||||
if not videos:
|
||||
LOGGER.warning("No videos found in %s", args.videos_dir)
|
||||
return 1
|
||||
|
||||
for video in videos:
|
||||
num = shot_number(video)
|
||||
audio = args.audios_dir / f"output_{num}.mp3"
|
||||
output = args.output_dir / f"merged_{num}.mp4"
|
||||
|
||||
if not audio.exists():
|
||||
LOGGER.warning("No audio found for shot %s (%s); skipped", num, audio)
|
||||
continue
|
||||
|
||||
if output.exists():
|
||||
LOGGER.info("Already exists; skipped shot %s", num)
|
||||
continue
|
||||
|
||||
LOGGER.info("Merging shot %s: %s + %s -> %s", num, video, audio, output)
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
str(video),
|
||||
"-i",
|
||||
str(audio),
|
||||
"-c:v",
|
||||
"copy",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-shortest",
|
||||
"-y",
|
||||
str(output),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
LOGGER.info("Done: %s", output)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
163
src/scripts/run_video_pipeline.py
Normal file
163
src/scripts/run_video_pipeline.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run the full video pipeline: generate, merge, and concatenate."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from logging_config import configure_logging
|
||||
from s3_video_storage import S3VideoStorage
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
PROJECT_ROOT = SCRIPT_DIR.parents[1]
|
||||
DEFAULT_BASE_DIR = PROJECT_ROOT
|
||||
DEFAULT_HUNYUAN_DIR = DEFAULT_BASE_DIR / "HunyuanVideo-1.5"
|
||||
DEFAULT_REEL_SCRIPT = DEFAULT_BASE_DIR / "reel_script.json"
|
||||
DEFAULT_IMAGES_DIR = DEFAULT_BASE_DIR / "images"
|
||||
DEFAULT_VIDEOS_DIR = DEFAULT_BASE_DIR / "videos"
|
||||
DEFAULT_AUDIOS_DIR = DEFAULT_BASE_DIR / "audios"
|
||||
DEFAULT_MERGED_DIR = DEFAULT_BASE_DIR / "merged"
|
||||
DEFAULT_OUTPUT = DEFAULT_BASE_DIR / "results" / "final_output.mp4"
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--base-dir", type=Path, default=DEFAULT_BASE_DIR)
|
||||
parser.add_argument("--hunyuan-dir", type=Path, default=DEFAULT_HUNYUAN_DIR)
|
||||
parser.add_argument("--reel-script", type=Path, default=DEFAULT_REEL_SCRIPT)
|
||||
parser.add_argument("--images-dir", type=Path, default=DEFAULT_IMAGES_DIR)
|
||||
parser.add_argument("--videos-dir", type=Path, default=DEFAULT_VIDEOS_DIR)
|
||||
parser.add_argument("--audios-dir", type=Path, default=DEFAULT_AUDIOS_DIR)
|
||||
parser.add_argument("--merged-dir", type=Path, default=DEFAULT_MERGED_DIR)
|
||||
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
|
||||
parser.add_argument("--seed", type=int, default=1)
|
||||
parser.add_argument("--skip-generate", action="store_true")
|
||||
parser.add_argument("--skip-merge", action="store_true")
|
||||
parser.add_argument("--skip-concat", action="store_true")
|
||||
parser.add_argument("--skip-s3-upload", action="store_true")
|
||||
parser.add_argument("--log-level", default="INFO")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def run_step(name: str, cmd: list[str]) -> None:
|
||||
LOGGER.info("=== %s ===", name)
|
||||
LOGGER.info("$ %s", " ".join(str(part) for part in cmd))
|
||||
subprocess.run(cmd, check=True)
|
||||
|
||||
|
||||
def maybe_upload_to_s3(output_path: Path) -> None:
|
||||
bucket = os.getenv("AWS_S3_BUCKET")
|
||||
if not bucket:
|
||||
LOGGER.warning("Skipping S3 upload: AWS_S3_BUCKET is not set")
|
||||
return
|
||||
|
||||
storage = S3VideoStorage(
|
||||
{
|
||||
"bucket_name": bucket,
|
||||
"region_name": os.getenv("AWS_REGION"),
|
||||
"endpoint_url": os.getenv("AWS_S3_ENDPOINT_URL"),
|
||||
"aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
"aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
"aws_session_token": os.getenv("AWS_SESSION_TOKEN"),
|
||||
}
|
||||
)
|
||||
s3_uri = storage.store_file(output_path)
|
||||
LOGGER.info("Uploaded output to %s", s3_uri)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
configure_logging(args.log_level)
|
||||
|
||||
# If only base-dir is overridden, derive the common subpaths from it.
|
||||
if args.base_dir != DEFAULT_BASE_DIR:
|
||||
if args.hunyuan_dir == DEFAULT_HUNYUAN_DIR:
|
||||
args.hunyuan_dir = args.base_dir / "HunyuanVideo-1.5"
|
||||
if args.reel_script == DEFAULT_REEL_SCRIPT:
|
||||
args.reel_script = args.base_dir / "reel_script.json"
|
||||
if args.images_dir == DEFAULT_IMAGES_DIR:
|
||||
args.images_dir = args.base_dir / "images"
|
||||
if args.videos_dir == DEFAULT_VIDEOS_DIR:
|
||||
args.videos_dir = args.base_dir / "videos"
|
||||
if args.audios_dir == DEFAULT_AUDIOS_DIR:
|
||||
args.audios_dir = args.base_dir / "audios"
|
||||
if args.merged_dir == DEFAULT_MERGED_DIR:
|
||||
args.merged_dir = args.base_dir / "merged"
|
||||
if args.output == DEFAULT_OUTPUT:
|
||||
args.output = args.base_dir / "results" / "final_output.mp4"
|
||||
|
||||
try:
|
||||
if not args.skip_generate:
|
||||
run_step(
|
||||
"Generate Videos",
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "generate_videos.py"),
|
||||
"--hunyuan-dir",
|
||||
str(args.hunyuan_dir),
|
||||
"--reel-script",
|
||||
str(args.reel_script),
|
||||
"--images-dir",
|
||||
str(args.images_dir),
|
||||
"--videos-dir",
|
||||
str(args.videos_dir),
|
||||
"--audios-dir",
|
||||
str(args.audios_dir),
|
||||
"--seed",
|
||||
str(args.seed),
|
||||
],
|
||||
)
|
||||
|
||||
if not args.skip_merge:
|
||||
run_step(
|
||||
"Merge Audio + Video",
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "merge_audio_video.py"),
|
||||
"--videos-dir",
|
||||
str(args.videos_dir),
|
||||
"--audios-dir",
|
||||
str(args.audios_dir),
|
||||
"--output-dir",
|
||||
str(args.merged_dir),
|
||||
],
|
||||
)
|
||||
|
||||
if not args.skip_concat:
|
||||
run_step(
|
||||
"Concatenate Merged Videos",
|
||||
[
|
||||
sys.executable,
|
||||
str(SCRIPT_DIR / "concat_merged.py"),
|
||||
"--merged-dir",
|
||||
str(args.merged_dir),
|
||||
"--output",
|
||||
str(args.output),
|
||||
],
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
LOGGER.exception("Pipeline failed at command: %s", exc.cmd)
|
||||
return exc.returncode
|
||||
|
||||
if not args.skip_s3_upload:
|
||||
try:
|
||||
maybe_upload_to_s3(args.output)
|
||||
except Exception:
|
||||
LOGGER.exception("Failed uploading output to S3")
|
||||
return 1
|
||||
|
||||
LOGGER.info("Pipeline complete")
|
||||
LOGGER.info("Final output: %s", args.output)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user