Source code for cw.lib.storyboard

"""
Storyboard generation from TV spot scripts.

This module generates image prompts from TV spot script visual descriptions
and creates DiffusionJobs for storyboard frame generation.

Key Features:
    - Extracts visual elements from script descriptions
    - Optional LLM enhancement for more detailed prompts
    - Creates linked DiffusionJob records for each frame
    - Supports visual style prefixes for consistency

Classes:
    :class:`StoryboardGenerator`
        Generates image prompts from script rows with optional LLM enhancement

Functions:
    :func:`create_storyboard_jobs`
        Creates DiffusionJob and StoryboardImage records from prompts

Workflow:
    1. Extract visual elements from script row ``visual_text``
    2. Add visual style prefix and cinematic quality keywords
    3. Optionally enhance with LLM (HFPromptEnhancer)
    4. Create DiffusionJob records linked to StoryboardJob

Usage::

    from cw.lib.storyboard import StoryboardGenerator, create_storyboard_jobs

    # Generate prompts
    generator = StoryboardGenerator(use_llm=True)
    prompts = generator.generate_prompts_for_version(tv_spot_version)

    # Create DiffusionJobs
    jobs = create_storyboard_jobs(storyboard_job, prompts)

Note:
    Generated storyboard frames use 1280x720 (16:9) dimensions by default.
"""

import logging
from typing import Optional

logger = logging.getLogger(__name__)



[docs]
class StoryboardGenerator:
    """
    Generates storyboard image prompts from TV spot script rows.

    Can use either simple prompt building (combines visual description with
    style prefix) or LLM-enhanced prompts for more detailed generation.
    """


[docs]
    def __init__(
        self,
        use_llm: bool = True,
        model_id: str = "Qwen/Qwen2.5-3B-Instruct",
        device: Optional[str] = None,
    ):
        """
        Initialize the storyboard generator.

        Args:
            use_llm: Whether to use LLM for enhanced prompt generation
            model_id: HuggingFace model ID for prompt enhancement
            device: Device to use ('cpu', 'mps', 'cuda', or None for auto-detect)
        """
        self.use_llm = use_llm
        self.model_id = model_id
        self.device = device
        self._enhancer = None


    def _get_enhancer(self):
        """Get or create the prompt enhancer."""
        if self._enhancer is None and self.use_llm:
            logger.debug(f"Creating HFPromptEnhancer with model: {self.model_id}")
            from cw.lib.prompt_enhancer import HFPromptEnhancer

            self._enhancer = HFPromptEnhancer(
                model_id=self.model_id,
                style="cinematic",
                creativity=0.7,
            )
            logger.debug("HFPromptEnhancer created")
        return self._enhancer


[docs]
    def generate_prompt(
        self,
        visual_text: str,
        audio_text: str,
        visual_style_prompt: str = "",
        enhance: bool = True,
    ) -> dict:
        """
        Generate an image prompt from a script row.

        Args:
            visual_text: Visual description from script (shots, settings, actions)
            audio_text: Audio description (dialogue, VO) for context
            visual_style_prompt: Common style prefix for consistency
            enhance: Whether to use LLM enhancement

        Returns:
            Dict with 'prompt' and optionally 'negative_prompt'
        """
        logger.debug(f"Generating prompt from visual_text: {visual_text[:80]}...")

        # Build base prompt from visual description
        # Extract key visual elements, ignoring timing/technical notes
        base_prompt = self._extract_visual_elements(visual_text)
        logger.debug(f"Extracted visual elements: {base_prompt[:80]}...")

        # Add style prefix if provided
        if visual_style_prompt:
            full_prompt = f"{visual_style_prompt}, {base_prompt}"
            logger.debug(f"Added style prefix: {visual_style_prompt[:50]}...")
        else:
            full_prompt = base_prompt

        # Add cinematic quality keywords
        quality_suffix = "cinematic still, professional photography, high quality, detailed"
        full_prompt = f"{full_prompt}, {quality_suffix}"

        result = {
            "prompt": full_prompt,
            "negative_prompt": "blurry, low quality, amateur, distorted, watermark, text overlay",
            "source_visual": visual_text,
            "source_audio": audio_text,
        }

        # Optionally enhance with LLM
        if enhance and self.use_llm:
            logger.debug("Attempting LLM enhancement")
            try:
                enhancer = self._get_enhancer()
                if enhancer:
                    enhanced = enhancer.enhance_prompt(full_prompt)
                    result["prompt"] = enhanced.get("enhanced_prompt", full_prompt)
                    result["negative_prompt"] = enhanced.get(
                        "negative_prompt", result["negative_prompt"]
                    )
                    result["enhanced"] = True
                    logger.debug(
                        f"LLM enhancement successful, prompt length: {len(result['prompt'])}"
                    )
            except Exception as e:
                logger.warning(f"LLM enhancement failed, using base prompt: {e}")
                result["enhanced"] = False
        else:
            logger.debug("Skipping LLM enhancement (disabled)")
            result["enhanced"] = False

        return result


    def _extract_visual_elements(self, visual_text: str) -> str:
        """
        Extract key visual elements from script visual description.

        Removes timing notations, technical directions, and focuses on
        describable visual content.
        """
        # Remove common technical prefixes
        text = visual_text

        # Remove timing references like "00:00:05:00" or "(5.0s)"
        import re

        text = re.sub(r"\d{2}:\d{2}:\d{2}:\d{2}", "", text)
        text = re.sub(r"\(\d+\.?\d*s\)", "", text)

        # Remove shot type prefixes (these are useful context but not for image gen)
        # Keep them but normalize - they add context
        shot_types = ["CU", "MCU", "MS", "WS", "ECU", "EWS", "MWS", "POV", "OTS"]
        for shot in shot_types:
            text = re.sub(rf"\b{shot}\b[:\s]*", f"{shot}: ", text, flags=re.IGNORECASE)

        # Clean up extra whitespace
        text = " ".join(text.split())

        return text.strip()


[docs]
    def generate_prompts_for_version(
        self,
        video_ad_unit,
        enhance: bool = True,
    ) -> list[dict]:
        """
        Generate prompts for all script rows in a video ad unit.

        Args:
            video_ad_unit: VideoAdUnit instance
            enhance: Whether to use LLM enhancement

        Returns:
            List of prompt dicts, one per script row
        """
        logger.debug(f"Generating prompts for video ad unit: {video_ad_unit.code}")
        prompts = []
        visual_style = video_ad_unit.visual_style_prompt or ""
        logger.debug(f"Visual style prompt: {visual_style[:50] if visual_style else '(none)'}...")

        script_rows = list(video_ad_unit.script_rows.all().order_by("order_index"))
        logger.debug(f"Processing {len(script_rows)} script rows")

        for idx, row in enumerate(script_rows):
            logger.debug(
                f"Generating prompt for row {idx + 1}/{len(script_rows)}: shot {row.shot_number}"
            )
            prompt_data = self.generate_prompt(
                visual_text=row.visual_text,
                audio_text=row.audio_text,
                visual_style_prompt=visual_style,
                enhance=enhance,
            )
            prompt_data["row_index"] = row.order_index
            prompt_data["shot_number"] = row.shot_number
            prompts.append(prompt_data)

        logger.info(f"Generated {len(prompts)} prompts for video ad unit {video_ad_unit.code}")
        return prompts





[docs]
def create_storyboard_jobs(
    storyboard,
    prompts: list[dict],
) -> list:
    """
    Create DiffusionJobs and StoryboardImages for a storyboard.

    Args:
        storyboard: Storyboard instance
        prompts: List of prompt dicts from generate_prompts_for_version

    Returns:
        List of created DiffusionJob instances
    """
    from cw.diffusion.models import DiffusionJob, Prompt
    from cw.tvspots.models import StoryboardImage

    video_ad_unit = storyboard.video_ad_unit
    campaign = video_ad_unit.campaign
    diffusion_model = storyboard.diffusion_model
    lora_model = storyboard.lora_model
    images_per_row = storyboard.images_per_row

    created_jobs = []
    script_rows = {row.order_index: row for row in video_ad_unit.script_rows.all()}

    for prompt_data in prompts:
        row_index = prompt_data["row_index"]
        shot_number = prompt_data.get("shot_number", f"{row_index + 1:02d}")
        script_row = script_rows.get(row_index)

        if not script_row:
            logger.warning(f"Script row {row_index} not found, skipping")
            continue

        for img_idx in range(images_per_row):
            # Create identifier: {job_id}_{ad_unit_code}_row-{NN}_img-{NN}
            identifier = (
                f"{campaign.job_id}_{video_ad_unit.code}_row-{shot_number}_img-{img_idx + 1:02d}"
            )

            # Create Prompt record
            prompt_record = Prompt.objects.create(
                source_prompt=prompt_data["prompt"],
                enhanced_prompt=prompt_data["prompt"],  # Already enhanced if using LLM
                negative_prompt=prompt_data.get("negative_prompt", ""),
                enhancement_method="huggingface" if prompt_data.get("enhanced") else "none",
            )

            # Create DiffusionJob with 16:9 storyboard dimensions
            diffusion_job = DiffusionJob.objects.create(
                diffusion_model=diffusion_model,
                lora_model=lora_model,
                prompt=prompt_record,
                identifier=identifier,
                status="pending",
                width=1280,
                height=720,
            )

            # Create StoryboardImage link
            StoryboardImage.objects.create(
                storyboard=storyboard,
                script_row=script_row,
                diffusion_job=diffusion_job,
                image_index=img_idx,
            )

            created_jobs.append(diffusion_job)

            logger.info(
                f"Created DiffusionJob for storyboard: {identifier}",
                extra={
                    "storyboard_id": storyboard.pk,
                    "diffusion_job_id": diffusion_job.pk,
                    "row_index": row_index,
                    "image_index": img_idx,
                },
            )

    return created_jobs




[docs]
class WireframePromptBuilder:
    """Builds image prompts for wireframe/line-drawing storyboard generation.

    Unlike :class:`StoryboardGenerator` which creates prompts from script text,
    this builder creates simple style-focused prompts designed to work with
    ControlNet structural guidance from video keyframes.
    """

    DEFAULT_STYLE = (
        "clean line drawing, wireframe storyboard cel, black and white, "
        "professional illustration, architectural sketch style"
    )
    DEFAULT_NEGATIVE = (
        "photorealistic, photograph, blurry, low quality, watermark, "
        "text overlay, color photograph, 3D render"
    )


[docs]
    def __init__(self, style_prompt: str = ""):
        """
        Args:
            style_prompt: Custom style prompt. Falls back to DEFAULT_STYLE if empty.
        """
        self.style_prompt = style_prompt or self.DEFAULT_STYLE



[docs]
    def build_prompt(
        self,
        visual_text: str = "",
        scene_number: int | None = None,
    ) -> dict:
        """Build a wireframe generation prompt for a single keyframe.

        The prompt emphasises the desired visual *style* rather than content
        description — the content comes from the ControlNet reference image.

        Args:
            visual_text: Optional script-row description for extra context.
            scene_number: Optional scene number for logging.

        Returns:
            Dict with ``prompt`` and ``negative_prompt`` keys.
        """
        parts = [self.style_prompt]
        if visual_text:
            # Add a short content hint from the script
            parts.append(visual_text.strip())
        prompt = ", ".join(parts)

        logger.debug(
            f"Built wireframe prompt for scene {scene_number}: {prompt[:80]}..."
        )
        return {
            "prompt": prompt,
            "negative_prompt": self.DEFAULT_NEGATIVE,
        }





[docs]
def create_wireframe_storyboard_jobs(storyboard) -> list:
    """Create ControlNet-guided DiffusionJobs from video keyframes.

    Matches keyframes (by ``scene_number``) to script rows and creates one
    :class:`~cw.diffusion.models.DiffusionJob` per keyframe per ``images_per_row``.
    Each DiffusionJob is configured with the storyboard's ControlNet settings
    and the keyframe image as the reference input.

    Args:
        storyboard: :class:`~cw.tvspots.models.Storyboard` with
            ``source_type="keyframe"`` and a ``controlnet_model`` set.

    Returns:
        List of created :class:`~cw.diffusion.models.DiffusionJob` instances.

    Raises:
        ValueError: If the VideoAdUnit has no source media or keyframes.
    """
    from django.core.files import File

    from cw.diffusion.models import DiffusionJob, Prompt
    from cw.tvspots.models import StoryboardImage

    video_ad_unit = storyboard.video_ad_unit
    campaign = video_ad_unit.campaign

    # Resolve ControlNet settings with fallback to model defaults
    controlnet = storyboard.controlnet_model
    preprocessing = (
        storyboard.preprocessing_type or controlnet.control_type
    )
    cond_scale = (
        storyboard.conditioning_scale
        if storyboard.conditioning_scale is not None
        else controlnet.default_conditioning_scale
    )
    guidance_end = (
        storyboard.control_guidance_end
        if storyboard.control_guidance_end is not None
        else controlnet.default_guidance_end
    )

    # Get keyframes via VideoAdUnit → source_media → result → key_frames
    source_media = getattr(video_ad_unit, "source_media", None)
    if not source_media or not source_media.result:
        raise ValueError(
            f"VideoAdUnit {video_ad_unit.code} has no source media with "
            "processing results. Upload and process a video first."
        )

    key_frames = list(
        source_media.result.key_frames.all().order_by("scene_number")
    )
    if not key_frames:
        raise ValueError(
            f"No keyframes found for VideoAdUnit {video_ad_unit.code}. "
            "Ensure video analysis completed successfully."
        )

    # Build scene_number → script_row mapping
    script_rows_by_scene = {}
    for row in video_ad_unit.script_rows.all():
        # shot_number often matches scene_number; fall back to order_index + 1
        try:
            scene_num = int(row.shot_number)
        except (ValueError, TypeError):
            scene_num = row.order_index + 1
        script_rows_by_scene[scene_num] = row

    prompt_builder = WireframePromptBuilder(style_prompt=storyboard.style_prompt)

    created_jobs = []

    for key_frame in key_frames:
        script_row = script_rows_by_scene.get(key_frame.scene_number)
        if not script_row:
            logger.warning(
                f"No script row for scene {key_frame.scene_number}, skipping keyframe"
            )
            continue

        visual_hint = script_row.visual_text if script_row else ""
        prompt_data = prompt_builder.build_prompt(
            visual_text=visual_hint,
            scene_number=key_frame.scene_number,
        )

        shot_number = script_row.shot_number or f"{script_row.order_index + 1:02d}"

        for img_idx in range(storyboard.images_per_row):
            identifier = (
                f"{campaign.job_id}_{video_ad_unit.code}"
                f"_wf-{shot_number}_img-{img_idx + 1:02d}"
            )

            prompt_record = Prompt.objects.create(
                source_prompt=prompt_data["prompt"],
                enhanced_prompt=prompt_data["prompt"],
                negative_prompt=prompt_data["negative_prompt"],
                enhancement_method="none",
            )

            # Create DiffusionJob with ControlNet settings and keyframe reference
            diffusion_job = DiffusionJob.objects.create(
                diffusion_model=storyboard.diffusion_model,
                lora_model=storyboard.lora_model,
                prompt=prompt_record,
                identifier=identifier,
                status="pending",
                width=1280,
                height=720,
                controlnet_model=controlnet,
                preprocessing_type=preprocessing,
                conditioning_scale=cond_scale,
                control_guidance_end=guidance_end,
            )

            # Copy the keyframe image to the DiffusionJob's reference_image field
            if key_frame.image:
                with open(key_frame.image.path, "rb") as f:
                    diffusion_job.reference_image.save(
                        f"ref_scene_{key_frame.scene_number:03d}.jpg",
                        File(f),
                        save=True,
                    )

            # Create StoryboardImage linking everything
            StoryboardImage.objects.create(
                storyboard=storyboard,
                script_row=script_row,
                diffusion_job=diffusion_job,
                key_frame=key_frame,
                image_index=img_idx,
            )

            created_jobs.append(diffusion_job)

            logger.info(
                f"Created wireframe DiffusionJob: {identifier}",
                extra={
                    "storyboard_id": storyboard.pk,
                    "diffusion_job_id": diffusion_job.pk,
                    "key_frame_id": key_frame.pk,
                    "scene_number": key_frame.scene_number,
                    "image_index": img_idx,
                },
            )

    logger.info(
        f"Created {len(created_jobs)} wireframe DiffusionJobs for storyboard {storyboard.pk}",
        extra={
            "storyboard_id": storyboard.pk,
            "num_jobs": len(created_jobs),
            "num_keyframes": len(key_frames),
        },
    )

    return created_jobs