commit de1bb5c23f8789a906ff82bb80237c08b7b6d7bb
Author: Madina <madina@example.com>
Date:   Wed Apr 1 04:36:27 2026 -0700

    Video generation pipelines files added

diff --git a/.env b/.env
new file mode 100644
index 0000000..f22ab82
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+ELEVENLABS_API_KEY=sk_e343522cb3fd4da2d46844e81e1152e3de2a72cd1430a383
diff --git a/HunyuanVideo-1.5 b/HunyuanVideo-1.5
new file mode 160000
index 0000000..2641c0d
--- /dev/null
+++ b/HunyuanVideo-1.5
@@ -0,0 +1 @@
+Subproject commit 2641c0de73da0a2d9682fed24af2c0a516527cc1
diff --git a/concat_merged.sh b/concat_merged.sh
new file mode 100644
index 0000000..39163c1
--- /dev/null
+++ b/concat_merged.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Concatenates all merged/merged_n.mp4 into final_output.mp4
+
+BASE_DIR="/home/madina/projects/short_videos"
+MERGED_DIR="$BASE_DIR/merged"
+OUTPUT="$BASE_DIR/results/run_3/final_output.mp4"
+FILELIST=$(mktemp /tmp/filelist_XXXXXX.txt)
+
+# Build file list sorted by shot number
+for video in $(ls "$MERGED_DIR"/merged_*.mp4 | sort -t_ -k2 -n); do
+    echo "file '$video'" >> "$FILELIST"
+done
+
+echo "Concatenating the following files:"
+cat "$FILELIST"
+
+ffmpeg -f concat -safe 0 -i "$FILELIST" -c copy -y "$OUTPUT"
+
+rm -f "$FILELIST"
+echo ""
+echo "Done"
diff --git a/generate_audios.py b/generate_audios.py
new file mode 100644
index 0000000..dc88555
--- /dev/null
+++ b/generate_audios.py
@@ -0,0 +1,35 @@
+from elevenlabs.client import ElevenLabs
+from elevenlabs.play import play
+import os
+import json
+from dotenv import load_dotenv
+
+load_dotenv()
+ELEVENLABS_API_KEY = os.getenv('ELEVENLABS_API_KEY')
+
+
+if __name__ == '__main__':
+    
+    script_path = "reel_script.json"
+    with open(script_path, "r") as f:
+        reel_data = json.load(f)
+
+    client = ElevenLabs(
+        api_key=ELEVENLABS_API_KEY
+    )
+    for shot in reel_data["shots"]:
+        print(shot["shot_number"], shot["voiceover"])
+        prompt = shot["voiceover"]
+        audio = client.text_to_speech.convert(
+            text=prompt,
+            voice_id="JBFqnCBsd6RMkjVDRZzb",
+            model_id="eleven_multilingual_v2",
+            output_format="mp3_44100_128",
+        )
+        
+        audio_bytes = b"".join(audio)
+        
+        if not os.path.exists("audios"):
+            os.makedirs("audios")
+        with open(f"audios/output_{shot["shot_number"]}.mp3", "wb") as f:
+            f.write(audio_bytes)
\ No newline at end of file
diff --git a/generate_images.py b/generate_images.py
new file mode 100644
index 0000000..446145b
--- /dev/null
+++ b/generate_images.py
@@ -0,0 +1,28 @@
+import torch
+from diffusers import FluxPipeline
+import json
+import os
+
+if __name__ == '__main__':
+    
+    script_path = "reel_script.json"
+    with open(script_path, "r") as f:
+        reel_data = json.load(f)
+
+    pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16)
+    pipe.enable_model_cpu_offload() 
+    
+    for shot in reel_data["shots"]:
+        print(shot["shot_number"], shot["image_description"])
+        prompt = shot["image_description"]
+        image = pipe(
+            prompt,
+            guidance_scale=0.0,
+            num_inference_steps=4,
+            max_sequence_length=256,
+            generator=torch.Generator("cpu").manual_seed(0)
+        ).images[0]
+        
+        if not os.path.exists("images"):
+            os.makedirs("images")
+        image.save(f"images/shot_{shot["shot_number"]}.png")
\ No newline at end of file
diff --git a/generate_script.py b/generate_script.py
new file mode 100644
index 0000000..410b8be
--- /dev/null
+++ b/generate_script.py
@@ -0,0 +1,344 @@
+import torch
+import json
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import re
+from typing import Optional
+
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+MODEL_ID = "Qwen/Qwen3-14B"
+WORDS_PER_SECOND = 2.5  
+MAX_DEAD_AIR_SECONDS = 1 
+MAX_VOICEOVER_SECONDS = 5.0 
+MAX_VOICEOVER_WORDS = int(MAX_VOICEOVER_SECONDS * WORDS_PER_SECOND)
+MIN_VOICEOVER_WORDS = 5 
+
+
+def load_model(model_id: str = MODEL_ID):
+    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        quantization_config=bnb_config,
+        device_map="auto",
+        trust_remote_code=True,
+    ).eval()
+    
+    return model, tokenizer
+ 
+ 
+def generate_reel_scenario(
+    model,
+    tokenizer,
+    content_summary: str,
+    temperature: float = 0.75,
+    top_p: float = 0.9,
+    repetition_penalty: float = 1.1,
+) -> str:
+    """
+    Generate a shot-by-shot Instagram Reel scenario where every script beat
+    has its own image description for AI image/video generation.
+ 
+    Each shot in the output contains:
+      - Timestamp
+      - Beat label (HOOK, PROBLEM, etc.)
+      - Voiceover line
+      - Text on screen
+      - Image description (AI-generation-ready prompt)
+ 
+    The hook question from the topic summary is preserved as the opening beat.
+    """
+ 
+    system_prompt = (
+        "You are a professional Instagram Reel director and scriptwriter specializing in EdTech content. "
+        "You think like a filmmaker: every line of voiceover has a visual that amplifies it. "
+        "You write punchy, cinematic, scroll-stopping reels that feel native to social media. "
+        "You write image descriptions like a cinematographer briefing an AI image generator — "
+        "specific, vivid, atmospheric, with clear subject, composition, lighting, mood, and style."
+    )
+ 
+    user_prompt = f"""You are given a topic summary for an Instagram Reel.
+    Your job is to direct a complete shot-by-shot reel scenario — like a real filmmaker laying out a storyboard.
+     
+    ## TOPIC SUMMARY
+    {content_summary}
+     
+    ---
+     
+    ## PACING RULES — read these before writing a single shot
+     
+    These rules exist because every shot in this reel will be rendered as a real video clip. Timestamps must be tight and honest.
+     
+    **Speech rate:** spoken voiceover moves at roughly 2.5 words per second in a reel (energetic, not rushed).
+     
+    **VOICEOVER LENGTH: vary naturally between 2–5 seconds per shot. Diversity is encouraged.**
+    Different beats call for different rhythms:
+    - A hook or payoff line can be short and punchy: 2–3 seconds (5–8 words). Let it land.
+    - A problem or tension beat needs more breath: 4–5 seconds (10–12 words). Build the feeling.
+    - A CTA can be medium: 3–4 seconds (8–10 words). Direct and warm.
+     
+    The one hard constraint is the 5-second ceiling — the audio renderer cannot handle more than 5 seconds per shot (12 words). Never exceed this. If an idea needs more than 12 words, split it into two shots.
+     
+    VARIETY IN ACTION — a good reel sounds like this:
+      SHOT 1 (HOOK, 2s):      "Will your major still matter in five years?" — short, punchy, stops the scroll
+      SHOT 2 (PROBLEM, 5s):   "AI is already replacing writers, designers, and even doctors — fields we thought were safe." — builds tension
+      SHOT 3 (TENSION, 4s):   "Most students have no idea this is already happening to them." — personal, lands hard
+      SHOT 4 (PAYOFF, 3s):    "Will your major still matter? It depends on you." — echo + flip
+      SHOT 5 (SOLUTION, 5s):  "The students who'll thrive are learning skills AI simply cannot replicate yet." — concrete, hopeful
+      SHOT 6 (CTA, 3s):       "Follow us for tips on future-proofing your career." — warm, direct
+     
+    Notice how the lengths vary. That variation is intentional — it creates rhythm and keeps the viewer engaged.
+     
+    **Shot duration = voiceover duration + 0–1 second of breathing room.** Match the shot length to what you actually wrote, not to a preset slot.
+     
+    **Continuity.** The end timestamp of one shot is the start timestamp of the next. No gaps, no overlaps. The reel runs like a stopwatch.
+     
+    **Total reel length:** 45–60 seconds. Count it up before you finalize. If you're over, trim voiceover. If you're under, add a shot.
+     
+    **Shot count:** aim for 7–10 shots. More shots = faster pace = more energy. Fewer shots = slower, more contemplative. Match the tone of the topic.
+     
+    ---
+     
+    ## BRAND CONTEXT — read before writing any shot
+     
+    These reels are published on the **LiveCarta** Instagram account. LiveCarta is an AI-powered EdTech platform for higher education that lets educators build custom coursepacks by mixing and matching chapters from top publishers, adding their own materials, video, and notes — all in one place. Students get flexible, affordable access: buy whole books or individual chapters, read online or via the app. Key features: AI Curriculum Builder, Remix-on-Demand, pay-by-chapter pricing, LMS integration (Canvas, Blackboard), and real-time content updates.
+     
+    **Brand mention rules:**
+     
+    1. **CTA beat — always name LiveCarta.** The final CTA shot(s) must mention LiveCarta by name. Reference it naturally in context — e.g. "Follow LiveCarta for more", "Check out LiveCarta", "LiveCarta helps students stay ahead."
+     
+    2. **Mid-reel mention — only when there's a genuine, obvious overlap.** If the topic directly connects to what LiveCarta does, drop a natural product mention in the SOLUTION beat. Do NOT force it. The overlap is genuine when the topic is about: customizing course content, affordable textbooks, AI in education, flexible learning, keeping up with fast-changing fields, building skills alongside a degree. If the overlap isn't obvious, skip the mid-reel mention entirely — a forced plug feels fake and loses the audience.
+     
+    3. **Never open with the brand.** The HOOK and PROBLEM beats must earn attention first. LiveCarta enters only once the viewer is already hooked.
+    
+    Write shots that follow these beats in order. Each beat can be one or more shots — distribute them based on how long the idea takes to say out loud, not on a fixed slot.
+     
+      HOOK             — The hook question from the topic, spoken word-for-word. One punchy sentence. Creates immediate tension or curiosity.
+      PROBLEM          — Build the relatable pain. Show the viewer their own situation. Make it vivid and specific, not abstract.
+      TENSION / RELATE — Go deeper into the problem. A concrete, personal detail that makes the viewer think "that's exactly me."
+      HOOK PAYOFF      — Echo the opening hook question word-for-word, then immediately flip it with the insight. This is the emotional turning point.
+      SOLUTION         — The concrete, actionable answer. Specific enough to be immediately useful.
+      CTA              — Direct call to action. **Must name LiveCarta by name.** End with a follow or save prompt. Warm, not salesy.
+     
+    ## OUTPUT FORMAT
+     
+    Output exactly two sections. Start immediately with the first separator — no preamble.
+     
+    ------------------------------------------------------------
+    SHOT LIST
+    For each shot use EXACTLY this format:
+     
+    SHOT [N]
+    Timestamp: [start]–[end]
+    Beat: [BEAT LABEL]
+    Voiceover: [2–5 seconds of speech (5–12 words). Vary length by beat — short for hooks/payoffs, longer for problem/tension/solution. Hard ceiling: 12 words.]
+    Text on screen: [3–6 word punchy overlay that punches up the voiceover, not just repeats it]
+    Image description: [Standalone AI image generation prompt. Describe: subject, composition, camera angle, lighting, color palette, mood, visual style. Make it cinematic and specific. 2–4 sentences. No references to "the reel", "the previous shot", or any other shot.]
+     
+    ------------------------------------------------------------
+    CAPTION
+     
+    Write a 4–5 line Instagram caption:
+      Line 1: hook statement (echoes the reel's opening question)
+      Lines 2–3: expand the insight in 1–2 casual, direct sentences
+      Line 4: engagement question to the audience
+      Line 5: 8–10 relevant hashtags
+     
+    ---
+     
+    ## FINAL CHECKS before outputting
+    - Does the voiceover length vary across shots? Hooks and payoffs should be short (2–3s), problems and solutions longer (4–5s).
+    - Does any voiceover exceed 12 words? If so, split that shot.
+    - Does every shot's timestamp connect cleanly to the next?
+    - Does the total add up to 45–60 seconds?
+    - Does the CTA beat name LiveCarta explicitly?
+    - If the topic overlaps with LiveCarta's features, is there a natural mid-reel mention in the SOLUTION beat?
+    - Does the HOOK PAYOFF echo it verbatim before the flip?
+    - Is every Image description usable as a standalone AI generation prompt?
+    """
+ 
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+ 
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+ 
+    inputs = tokenizer(text, return_tensors="pt").to(model.device)
+ 
+    print("Generating reel scenario..")
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=2000,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+ 
+    generated_ids = output_ids[0][inputs["input_ids"].shape[1]:]
+    result = tokenizer.decode(generated_ids, skip_special_tokens=True)
+    return result
+
+
+def _parse_timestamp_seconds(ts_str: str) -> tuple[Optional[int], Optional[int]]:
+    ts_str = re.sub(r'\s*[–—]+\s*', '-', ts_str).strip()
+ 
+    parts = ts_str.split('-')
+    if len(parts) != 2:
+        return None, None
+ 
+    def to_seconds(s: str) -> Optional[int]:
+        s = s.strip()
+        if ':' in s:
+            m, sec = s.split(':', 1)
+            try:
+                return int(m) * 60 + int(sec)
+            except ValueError:
+                return None
+        try:
+            return int(s)
+        except ValueError:
+            return None
+ 
+    return to_seconds(parts[0]), to_seconds(parts[1])
+ 
+
+def extract_field(label: str, next_label: Optional[str], text: str) -> str:
+    if next_label:
+        pattern = rf'{label}:\s*(.*?)(?=\n{next_label}:|$)'
+    else:
+        pattern = rf'{label}:\s*(.*?)$'
+    m = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
+    if m:
+        return m.group(1).strip().strip('"')
+    return ""
+
+    
+def parse_reel_scenario(raw_scenario: str) -> dict:
+    """
+    Parse the shot-by-shot reel scenario into a structured dict.
+ 
+    Returns:
+    {
+        "shots": [
+            {
+                "shot_number":        1,
+                "timestamp":          "0–4",
+                "start_sec":          0,
+                "end_sec":            4,
+                "duration_sec":       4,
+                "beat":               "HOOK",
+                "voiceover":          "Will your major still matter in 5 years?",
+                "word_count":         9,
+                "speech_duration_sec": 3.6,   # word_count / WORDS_PER_SECOND
+                "dead_air_sec":       0.4,     # duration_sec - speech_duration_sec
+                "text_on_screen":     "Your degree. Obsolete?",
+                "image_description":  "Close-up of a university diploma ...",
+            },
+            ...
+        ],
+        "caption": {
+            "body":     "Will your major still matter in 5 years? ...",
+            "hashtags": ["#EdTech", "#AIEducation", ...],
+        },
+        "total_duration_sec": 55,
+        
+    }
+    """
+    result = {
+        "shots": [],
+        "caption": {"body": "", "hashtags": []},
+        "total_duration_sec": 0,
+    }
+    raw_scenario = re.sub(r'<think>.*?</think>', '', raw_scenario, flags=re.DOTALL).strip()
+    cleaned = re.sub(r'\*+', '', raw_scenario)
+ 
+    shot_section = re.search(
+        r'SHOT LIST\s*\n(.*?)(?=-{4,}\s*CAPTION|$)',
+        cleaned, re.DOTALL | re.IGNORECASE
+    )
+    if shot_section:
+        shot_text = shot_section.group(1)
+        shot_blocks = re.split(r'\n(?=SHOT\s+\d+)', shot_text.strip())
+ 
+        for block in shot_blocks:
+            block = block.strip()
+            if not block:
+                continue
+ 
+            shot_num_match = re.match(r'SHOT\s+(\d+)', block)
+            if not shot_num_match:
+                continue
+            shot_number = int(shot_num_match.group(1))
+            
+            timestamp         = extract_field("Timestamp",        "Beat",              block)
+            beat              = extract_field("Beat",             "Voiceover",         block)
+            voiceover         = extract_field("Voiceover",        "Text on screen",    block)
+            text_on_screen    = extract_field("Text on screen",   "Image description", block)
+            image_description = extract_field("Image description", None,               block)
+ 
+            timestamp_display = re.sub(r'\s*[–—-]+\s*', '–', timestamp)
+ 
+            start_sec, end_sec = _parse_timestamp_seconds(timestamp)
+            duration_sec       = (end_sec - start_sec) if (start_sec is not None and end_sec is not None) else None
+            word_count         = len(voiceover.split()) if voiceover else 0
+            speech_duration    = round(word_count / WORDS_PER_SECOND, 1)
+            dead_air           = round(duration_sec - speech_duration, 1) if duration_sec is not None else None
+ 
+            result["shots"].append({
+                "shot_number":         shot_number,
+                "timestamp":           timestamp_display,
+                "start_sec":           start_sec,
+                "end_sec":             end_sec,
+                "duration_sec":        duration_sec,
+                "beat":                beat.upper(),
+                "voiceover":           voiceover,
+                "word_count":          word_count,
+                "speech_duration_sec": speech_duration,
+                "dead_air_sec":        dead_air,
+                "text_on_screen":      text_on_screen,
+                "image_description":   image_description,
+            })
+ 
+    caption_section = re.search(
+        r'CAPTION\s*\n(.*?)$',
+        cleaned, re.DOTALL | re.IGNORECASE
+    )
+ 
+    if caption_section:
+        caption_text = caption_section.group(1).strip()
+        lines = [l.strip() for l in caption_text.splitlines() if l.strip()]
+        hashtag_line = next((l for l in lines if l.startswith("#")), "")
+        body_lines   = [l for l in lines if not l.startswith("#")]
+        result["caption"] = {
+            "body":     "\n".join(body_lines).strip().strip('"'),
+            "hashtags": re.findall(r'#\w+', hashtag_line),
+        }
+ 
+    return result
+
+
+if __name__ == '__main__':
+
+    with open("topic_description.txt", "r") as f:
+        topic = f.read()
+        
+    model, tokenizer = load_model()
+    scenario_raw = generate_reel_scenario(model, tokenizer, topic)
+
+    parsed = parse_reel_scenario(scenario_raw)
+
+    with open("reel_script.json", "w") as f:
+        json.dump(parsed, f)
+    
\ No newline at end of file
diff --git a/generate_videos.sh b/generate_videos.sh
new file mode 100644
index 0000000..f929ad3
--- /dev/null
+++ b/generate_videos.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# video length is derived from actual audio file duration
+
+HUNYUAN_DIR="/home/madina/projects/short_videos/HunyuanVideo-1.5"
+REEL_SCRIPT="/home/madina/projects/short_videos/reel_script.json"
+IMAGES_DIR="/home/madina/projects/short_videos/images"
+VIDEOS_DIR="/home/madina/projects/short_videos/videos"
+AUDIOS_DIR="/home/madina/projects/short_videos/audios"
+MODEL_PATH="$HUNYUAN_DIR/ckpts"
+
+mkdir -p "$VIDEOS_DIR"
+
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:128
+
+# write shots to a temp TSV file (tab-separated: shot_number, prompt)
+TMPFILE=$(mktemp /tmp/shots_XXXXXX.tsv)
+python3 - <<EOF > "$TMPFILE"
+import json
+d = json.load(open('$REEL_SCRIPT'))
+for shot in d['shots']:
+    num = shot['shot_number']
+    desc = shot['image_description'].replace('\t', ' ').replace('\n', ' ')
+    print(f'{num}\t{desc}')
+EOF
+
+NUM_SHOTS=$(wc -l < "$TMPFILE")
+echo "Found $NUM_SHOTS shots to generate"
+
+while IFS=$'\t' read -r shot_number prompt; do
+    IMAGE_PATH="$IMAGES_DIR/shot_${shot_number}.png"
+    OUTPUT_PATH="$VIDEOS_DIR/output_${shot_number}.mp4"
+    AUDIO_PATH="$AUDIOS_DIR/output_${shot_number}.mp3"
+
+    # get audio duration and convert to frame count
+    if [ ! -f "$AUDIO_PATH" ]; then
+        echo "WARNING: No audio found at $AUDIO_PATH, falling back to 5s default."
+        DURATION=5.0
+    else
+        DURATION=$(ffprobe -v error -show_entries format=duration \
+            -of default=noprint_wrappers=1:nokey=1 "$AUDIO_PATH")
+        echo "Audio duration for shot $shot_number: ${DURATION}s"
+    fi
+
+    VIDEO_LENGTH=$(python3 -c "
+duration = float('$DURATION')
+frames = int(duration * 24) + 1
+if frames % 2 == 0:
+    frames += 1
+frames = max(49, min(frames, 169))
+print(frames)
+")
+
+    echo ""
+    echo "Shot $shot_number | ${DURATION}s -> ${VIDEO_LENGTH} frames"
+    echo "Prompt: $prompt"
+    echo "Image:  $IMAGE_PATH"
+    echo "Audio:  $AUDIO_PATH"
+    echo "Output: $OUTPUT_PATH"
+
+    if [ -f "$OUTPUT_PATH" ]; then
+        echo "OUTPUT_PATH already exists, can skip"
+        continue
+    fi
+
+    if [ ! -f "$IMAGE_PATH" ]; then
+        echo "WARNING: image not found at $IMAGE_PATH, skipped"
+        continue
+    fi
+
+    python3 -c "import torch; torch.cuda.empty_cache(); print('GPU cache cleared')"
+
+    cd "$HUNYUAN_DIR"
+    torchrun --nproc_per_node=1 generate.py \
+        --prompt "$prompt" \
+        --image_path "$IMAGE_PATH" \
+        --resolution 480p \
+        --aspect_ratio 16:9 \
+        --seed 1 \
+        --video_length $VIDEO_LENGTH \
+        --rewrite false \
+        --cfg_distilled true \
+        --enable_step_distill true \
+        --sparse_attn false \
+        --use_sageattn true \
+        --enable_cache false \
+        --overlap_group_offloading true \
+        --sr false \
+        --output_path "$OUTPUT_PATH" \
+        --model_path "$MODEL_PATH"
+
+    echo "shot $shot_number done"
+
+done < "$TMPFILE"
+
+rm -f "$TMPFILE"
+
+echo "Done"
diff --git a/merge_audio_video.sh b/merge_audio_video.sh
new file mode 100644
index 0000000..ccb87e1
--- /dev/null
+++ b/merge_audio_video.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+#merges videos/output_n.mp4 with audios/audio_n.mp3 -> merged/merged_n.mp4
+BASE_DIR="/home/madina/projects/short_videos"
+VIDEOS_DIR="$BASE_DIR/videos"
+AUDIOS_DIR="$BASE_DIR/audios"
+OUTPUT_DIR="$BASE_DIR/merged"
+
+mkdir -p "$OUTPUT_DIR"
+
+for video in "$VIDEOS_DIR"/output_*.mp4; do
+    num=$(basename "$video" | sed 's/output_\([0-9]*\)\.mp4/\1/')
+    audio="$AUDIOS_DIR/output_${num}.mp3"
+    output="$OUTPUT_DIR/merged_${num}.mp4"
+
+    if [ ! -f "$audio" ]; then
+        echo "WARNING: No audio found for shot $num ($audio); skipped"
+        continue
+    fi
+
+    if [ -f "$output" ]; then
+        echo "Already exists; skipped the shot $num."
+        continue
+    fi
+
+    echo "Merging shot $num: $video + $audio -> $output"
+    ffmpeg -i "$video" -i "$audio" \
+        -c:v copy \
+        -c:a aac \
+        -shortest \
+        -y "$output"
+
+    echo "Done: $output"
+done
diff --git a/topic_description.txt b/topic_description.txt
new file mode 100644
index 0000000..d307c88
--- /dev/null
+++ b/topic_description.txt
@@ -0,0 +1,6 @@
+**TITLE:** “I Only Use 3 Chapters Out Of This Book” – And That’s Okay  
+**CATEGORY:** Hot take / Controversial opinion  
+**TONE:** Empowering  
+**HOOK QUESTION:** Why pay for an entire book if you only read 10 pages?  
+**CONTENT SUMMARY:** Argue for customizable learning experiences through platforms like LiveCarta.  
+**TARGET AUDIENCE:** Students  
\ No newline at end of file