Video generation pipelines files added

2026-04-01 04:36:27 -07:00
commit de1bb5c23f
9 changed files with 569 additions and 0 deletions
--- a/generate_videos.sh
+++ b/generate_videos.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# video length is derived from actual audio file duration
+
+HUNYUAN_DIR="/home/madina/projects/short_videos/HunyuanVideo-1.5"
+REEL_SCRIPT="/home/madina/projects/short_videos/reel_script.json"
+IMAGES_DIR="/home/madina/projects/short_videos/images"
+VIDEOS_DIR="/home/madina/projects/short_videos/videos"
+AUDIOS_DIR="/home/madina/projects/short_videos/audios"
+MODEL_PATH="$HUNYUAN_DIR/ckpts"
+
+mkdir -p "$VIDEOS_DIR"
+
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128
+
+# write shots to a temp TSV file (tab-separated: shot_number, prompt)
+TMPFILE=$(mktemp /tmp/shots_XXXXXX.tsv)
+python3 - <<EOF > "$TMPFILE"
+import json
+d = json.load(open('$REEL_SCRIPT'))
+for shot in d['shots']:
+    num = shot['shot_number']
+    desc = shot['image_description'].replace('\t', ' ').replace('\n', ' ')
+    print(f'{num}\t{desc}')
+EOF
+
+NUM_SHOTS=$(wc -l < "$TMPFILE")
+echo "Found $NUM_SHOTS shots to generate"
+
+while IFS=$'\t' read -r shot_number prompt; do
+    IMAGE_PATH="$IMAGES_DIR/shot_${shot_number}.png"
+    OUTPUT_PATH="$VIDEOS_DIR/output_${shot_number}.mp4"
+    AUDIO_PATH="$AUDIOS_DIR/output_${shot_number}.mp3"
+
+    # get audio duration and convert to frame count
+    if [ ! -f "$AUDIO_PATH" ]; then
+        echo "WARNING: No audio found at $AUDIO_PATH, falling back to 5s default."
+        DURATION=5.0
+    else
+        DURATION=$(ffprobe -v error -show_entries format=duration \
+            -of default=noprint_wrappers=1:nokey=1 "$AUDIO_PATH")
+        echo "Audio duration for shot $shot_number: ${DURATION}s"
+    fi
+
+    VIDEO_LENGTH=$(python3 -c "
+duration = float('$DURATION')
+frames = int(duration * 24) + 1
+if frames % 2 == 0:
+    frames += 1
+frames = max(49, min(frames, 169))
+print(frames)
+")
+
+    echo ""
+    echo "Shot $shot_number | ${DURATION}s -> ${VIDEO_LENGTH} frames"
+    echo "Prompt: $prompt"
+    echo "Image:  $IMAGE_PATH"
+    echo "Audio:  $AUDIO_PATH"
+    echo "Output: $OUTPUT_PATH"
+
+    if [ -f "$OUTPUT_PATH" ]; then
+        echo "OUTPUT_PATH already exists, can skip"
+        continue
+    fi
+
+    if [ ! -f "$IMAGE_PATH" ]; then
+        echo "WARNING: image not found at $IMAGE_PATH, skipped"
+        continue
+    fi
+
+    python3 -c "import torch; torch.cuda.empty_cache(); print('GPU cache cleared')"
+
+    cd "$HUNYUAN_DIR"
+    torchrun --nproc_per_node=1 generate.py \
+        --prompt "$prompt" \
+        --image_path "$IMAGE_PATH" \
+        --resolution 480p \
+        --aspect_ratio 16:9 \
+        --seed 1 \
+        --video_length $VIDEO_LENGTH \
+        --rewrite false \
+        --cfg_distilled true \
+        --enable_step_distill true \
+        --sparse_attn false \
+        --use_sageattn true \
+        --enable_cache false \
+        --overlap_group_offloading true \
+        --sr false \
+        --output_path "$OUTPUT_PATH" \
+        --model_path "$MODEL_PATH"
+
+    echo "shot $shot_number done"
+
+done < "$TMPFILE"
+
+rm -f "$TMPFILE"
+
+echo "Done"