forked from LiveCarta/ContentGeneration
99 lines
2.8 KiB
Bash
99 lines
2.8 KiB
Bash
#!/bin/bash
|
|
|
|
# video length is derived from actual audio file duration
|
|
|
|
HUNYUAN_DIR="/home/madina/projects/short_videos/HunyuanVideo-1.5"
|
|
REEL_SCRIPT="/home/madina/projects/short_videos/reel_script.json"
|
|
IMAGES_DIR="/home/madina/projects/short_videos/images"
|
|
VIDEOS_DIR="/home/madina/projects/short_videos/videos"
|
|
AUDIOS_DIR="/home/madina/projects/short_videos/audios"
|
|
MODEL_PATH="$HUNYUAN_DIR/ckpts"
|
|
|
|
mkdir -p "$VIDEOS_DIR"
|
|
|
|
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128
|
|
|
|
# write shots to a temp TSV file (tab-separated: shot_number, prompt)
|
|
TMPFILE=$(mktemp /tmp/shots_XXXXXX.tsv)
|
|
python3 - <<EOF > "$TMPFILE"
|
|
import json
|
|
d = json.load(open('$REEL_SCRIPT'))
|
|
for shot in d['shots']:
|
|
num = shot['shot_number']
|
|
desc = shot['image_description'].replace('\t', ' ').replace('\n', ' ')
|
|
print(f'{num}\t{desc}')
|
|
EOF
|
|
|
|
NUM_SHOTS=$(wc -l < "$TMPFILE")
|
|
echo "Found $NUM_SHOTS shots to generate"
|
|
|
|
while IFS=$'\t' read -r shot_number prompt; do
|
|
IMAGE_PATH="$IMAGES_DIR/shot_${shot_number}.png"
|
|
OUTPUT_PATH="$VIDEOS_DIR/output_${shot_number}.mp4"
|
|
AUDIO_PATH="$AUDIOS_DIR/output_${shot_number}.mp3"
|
|
|
|
# get audio duration and convert to frame count
|
|
if [ ! -f "$AUDIO_PATH" ]; then
|
|
echo "WARNING: No audio found at $AUDIO_PATH, falling back to 5s default."
|
|
DURATION=5.0
|
|
else
|
|
DURATION=$(ffprobe -v error -show_entries format=duration \
|
|
-of default=noprint_wrappers=1:nokey=1 "$AUDIO_PATH")
|
|
echo "Audio duration for shot $shot_number: ${DURATION}s"
|
|
fi
|
|
|
|
VIDEO_LENGTH=$(python3 -c "
|
|
duration = float('$DURATION')
|
|
frames = int(duration * 24) + 1
|
|
if frames % 2 == 0:
|
|
frames += 1
|
|
frames = max(49, min(frames, 169))
|
|
print(frames)
|
|
")
|
|
|
|
echo ""
|
|
echo "Shot $shot_number | ${DURATION}s -> ${VIDEO_LENGTH} frames"
|
|
echo "Prompt: $prompt"
|
|
echo "Image: $IMAGE_PATH"
|
|
echo "Audio: $AUDIO_PATH"
|
|
echo "Output: $OUTPUT_PATH"
|
|
|
|
if [ -f "$OUTPUT_PATH" ]; then
|
|
echo "OUTPUT_PATH already exists, can skip"
|
|
continue
|
|
fi
|
|
|
|
if [ ! -f "$IMAGE_PATH" ]; then
|
|
echo "WARNING: image not found at $IMAGE_PATH, skipped"
|
|
continue
|
|
fi
|
|
|
|
python3 -c "import torch; torch.cuda.empty_cache(); print('GPU cache cleared')"
|
|
|
|
cd "$HUNYUAN_DIR"
|
|
torchrun --nproc_per_node=1 generate.py \
|
|
--prompt "$prompt" \
|
|
--image_path "$IMAGE_PATH" \
|
|
--resolution 480p \
|
|
--aspect_ratio 16:9 \
|
|
--seed 1 \
|
|
--video_length $VIDEO_LENGTH \
|
|
--rewrite false \
|
|
--cfg_distilled true \
|
|
--enable_step_distill true \
|
|
--sparse_attn false \
|
|
--use_sageattn true \
|
|
--enable_cache false \
|
|
--overlap_group_offloading true \
|
|
--sr false \
|
|
--output_path "$OUTPUT_PATH" \
|
|
--model_path "$MODEL_PATH"
|
|
|
|
echo "shot $shot_number done"
|
|
|
|
done < "$TMPFILE"
|
|
|
|
rm -f "$TMPFILE"
|
|
|
|
echo "Done"
|