"""
Storyboard generation from TV spot scripts.
This module generates image prompts from TV spot script visual descriptions
and creates DiffusionJobs for storyboard frame generation.
Key Features:
- Extracts visual elements from script descriptions
- Optional LLM enhancement for more detailed prompts
- Creates linked DiffusionJob records for each frame
- Supports visual style prefixes for consistency
Classes:
:class:`StoryboardGenerator`
Generates image prompts from script rows with optional LLM enhancement
Functions:
:func:`create_storyboard_jobs`
Creates DiffusionJob and StoryboardImage records from prompts
Workflow:
1. Extract visual elements from script row ``visual_text``
2. Add visual style prefix and cinematic quality keywords
3. Optionally enhance with LLM (HFPromptEnhancer)
4. Create DiffusionJob records linked to StoryboardJob
Usage::
from cw.lib.storyboard import StoryboardGenerator, create_storyboard_jobs
# Generate prompts
generator = StoryboardGenerator(use_llm=True)
prompts = generator.generate_prompts_for_version(tv_spot_version)
# Create DiffusionJobs
jobs = create_storyboard_jobs(storyboard_job, prompts)
Note:
Generated storyboard frames use 1280x720 (16:9) dimensions by default.
"""
import logging
from typing import Optional
logger = logging.getLogger(__name__)
[docs]
class StoryboardGenerator:
"""
Generates storyboard image prompts from TV spot script rows.
Can use either simple prompt building (combines visual description with
style prefix) or LLM-enhanced prompts for more detailed generation.
"""
[docs]
def __init__(
self,
use_llm: bool = True,
model_id: str = "Qwen/Qwen2.5-3B-Instruct",
device: Optional[str] = None,
):
"""
Initialize the storyboard generator.
Args:
use_llm: Whether to use LLM for enhanced prompt generation
model_id: HuggingFace model ID for prompt enhancement
device: Device to use ('cpu', 'mps', 'cuda', or None for auto-detect)
"""
self.use_llm = use_llm
self.model_id = model_id
self.device = device
self._enhancer = None
def _get_enhancer(self):
"""Get or create the prompt enhancer."""
if self._enhancer is None and self.use_llm:
logger.debug(f"Creating HFPromptEnhancer with model: {self.model_id}")
from cw.lib.prompt_enhancer import HFPromptEnhancer
self._enhancer = HFPromptEnhancer(
model_id=self.model_id,
style="cinematic",
creativity=0.7,
)
logger.debug("HFPromptEnhancer created")
return self._enhancer
[docs]
def generate_prompt(
self,
visual_text: str,
audio_text: str,
visual_style_prompt: str = "",
enhance: bool = True,
) -> dict:
"""
Generate an image prompt from a script row.
Args:
visual_text: Visual description from script (shots, settings, actions)
audio_text: Audio description (dialogue, VO) for context
visual_style_prompt: Common style prefix for consistency
enhance: Whether to use LLM enhancement
Returns:
Dict with 'prompt' and optionally 'negative_prompt'
"""
logger.debug(f"Generating prompt from visual_text: {visual_text[:80]}...")
# Build base prompt from visual description
# Extract key visual elements, ignoring timing/technical notes
base_prompt = self._extract_visual_elements(visual_text)
logger.debug(f"Extracted visual elements: {base_prompt[:80]}...")
# Add style prefix if provided
if visual_style_prompt:
full_prompt = f"{visual_style_prompt}, {base_prompt}"
logger.debug(f"Added style prefix: {visual_style_prompt[:50]}...")
else:
full_prompt = base_prompt
# Add cinematic quality keywords
quality_suffix = "cinematic still, professional photography, high quality, detailed"
full_prompt = f"{full_prompt}, {quality_suffix}"
result = {
"prompt": full_prompt,
"negative_prompt": "blurry, low quality, amateur, distorted, watermark, text overlay",
"source_visual": visual_text,
"source_audio": audio_text,
}
# Optionally enhance with LLM
if enhance and self.use_llm:
logger.debug("Attempting LLM enhancement")
try:
enhancer = self._get_enhancer()
if enhancer:
enhanced = enhancer.enhance_prompt(full_prompt)
result["prompt"] = enhanced.get("enhanced_prompt", full_prompt)
result["negative_prompt"] = enhanced.get(
"negative_prompt", result["negative_prompt"]
)
result["enhanced"] = True
logger.debug(
f"LLM enhancement successful, prompt length: {len(result['prompt'])}"
)
except Exception as e:
logger.warning(f"LLM enhancement failed, using base prompt: {e}")
result["enhanced"] = False
else:
logger.debug("Skipping LLM enhancement (disabled)")
result["enhanced"] = False
return result
def _extract_visual_elements(self, visual_text: str) -> str:
"""
Extract key visual elements from script visual description.
Removes timing notations, technical directions, and focuses on
describable visual content.
"""
# Remove common technical prefixes
text = visual_text
# Remove timing references like "00:00:05:00" or "(5.0s)"
import re
text = re.sub(r"\d{2}:\d{2}:\d{2}:\d{2}", "", text)
text = re.sub(r"\(\d+\.?\d*s\)", "", text)
# Remove shot type prefixes (these are useful context but not for image gen)
# Keep them but normalize - they add context
shot_types = ["CU", "MCU", "MS", "WS", "ECU", "EWS", "MWS", "POV", "OTS"]
for shot in shot_types:
text = re.sub(rf"\b{shot}\b[:\s]*", f"{shot}: ", text, flags=re.IGNORECASE)
# Clean up extra whitespace
text = " ".join(text.split())
return text.strip()
[docs]
def generate_prompts_for_version(
self,
video_ad_unit,
enhance: bool = True,
) -> list[dict]:
"""
Generate prompts for all script rows in a video ad unit.
Args:
video_ad_unit: VideoAdUnit instance
enhance: Whether to use LLM enhancement
Returns:
List of prompt dicts, one per script row
"""
logger.debug(f"Generating prompts for video ad unit: {video_ad_unit.code}")
prompts = []
visual_style = video_ad_unit.visual_style_prompt or ""
logger.debug(f"Visual style prompt: {visual_style[:50] if visual_style else '(none)'}...")
script_rows = list(video_ad_unit.script_rows.all().order_by("order_index"))
logger.debug(f"Processing {len(script_rows)} script rows")
for idx, row in enumerate(script_rows):
logger.debug(
f"Generating prompt for row {idx + 1}/{len(script_rows)}: shot {row.shot_number}"
)
prompt_data = self.generate_prompt(
visual_text=row.visual_text,
audio_text=row.audio_text,
visual_style_prompt=visual_style,
enhance=enhance,
)
prompt_data["row_index"] = row.order_index
prompt_data["shot_number"] = row.shot_number
prompts.append(prompt_data)
logger.info(f"Generated {len(prompts)} prompts for video ad unit {video_ad_unit.code}")
return prompts
[docs]
def create_storyboard_jobs(
storyboard,
prompts: list[dict],
) -> list:
"""
Create DiffusionJobs and StoryboardImages for a storyboard.
Args:
storyboard: Storyboard instance
prompts: List of prompt dicts from generate_prompts_for_version
Returns:
List of created DiffusionJob instances
"""
from cw.diffusion.models import DiffusionJob, Prompt
from cw.tvspots.models import StoryboardImage
video_ad_unit = storyboard.video_ad_unit
campaign = video_ad_unit.campaign
diffusion_model = storyboard.diffusion_model
lora_model = storyboard.lora_model
images_per_row = storyboard.images_per_row
created_jobs = []
script_rows = {row.order_index: row for row in video_ad_unit.script_rows.all()}
for prompt_data in prompts:
row_index = prompt_data["row_index"]
shot_number = prompt_data.get("shot_number", f"{row_index + 1:02d}")
script_row = script_rows.get(row_index)
if not script_row:
logger.warning(f"Script row {row_index} not found, skipping")
continue
for img_idx in range(images_per_row):
# Create identifier: {job_id}_{ad_unit_code}_row-{NN}_img-{NN}
identifier = (
f"{campaign.job_id}_{video_ad_unit.code}_row-{shot_number}_img-{img_idx + 1:02d}"
)
# Create Prompt record
prompt_record = Prompt.objects.create(
source_prompt=prompt_data["prompt"],
enhanced_prompt=prompt_data["prompt"], # Already enhanced if using LLM
negative_prompt=prompt_data.get("negative_prompt", ""),
enhancement_method="huggingface" if prompt_data.get("enhanced") else "none",
)
# Create DiffusionJob with 16:9 storyboard dimensions
diffusion_job = DiffusionJob.objects.create(
diffusion_model=diffusion_model,
lora_model=lora_model,
prompt=prompt_record,
identifier=identifier,
status="pending",
width=1280,
height=720,
)
# Create StoryboardImage link
StoryboardImage.objects.create(
storyboard=storyboard,
script_row=script_row,
diffusion_job=diffusion_job,
image_index=img_idx,
)
created_jobs.append(diffusion_job)
logger.info(
f"Created DiffusionJob for storyboard: {identifier}",
extra={
"storyboard_id": storyboard.pk,
"diffusion_job_id": diffusion_job.pk,
"row_index": row_index,
"image_index": img_idx,
},
)
return created_jobs
[docs]
class WireframePromptBuilder:
"""Builds image prompts for wireframe/line-drawing storyboard generation.
Unlike :class:`StoryboardGenerator` which creates prompts from script text,
this builder creates simple style-focused prompts designed to work with
ControlNet structural guidance from video keyframes.
"""
DEFAULT_STYLE = (
"clean line drawing, wireframe storyboard cel, black and white, "
"professional illustration, architectural sketch style"
)
DEFAULT_NEGATIVE = (
"photorealistic, photograph, blurry, low quality, watermark, "
"text overlay, color photograph, 3D render"
)
[docs]
def __init__(self, style_prompt: str = ""):
"""
Args:
style_prompt: Custom style prompt. Falls back to DEFAULT_STYLE if empty.
"""
self.style_prompt = style_prompt or self.DEFAULT_STYLE
[docs]
def build_prompt(
self,
visual_text: str = "",
scene_number: int | None = None,
) -> dict:
"""Build a wireframe generation prompt for a single keyframe.
The prompt emphasises the desired visual *style* rather than content
description — the content comes from the ControlNet reference image.
Args:
visual_text: Optional script-row description for extra context.
scene_number: Optional scene number for logging.
Returns:
Dict with ``prompt`` and ``negative_prompt`` keys.
"""
parts = [self.style_prompt]
if visual_text:
# Add a short content hint from the script
parts.append(visual_text.strip())
prompt = ", ".join(parts)
logger.debug(
f"Built wireframe prompt for scene {scene_number}: {prompt[:80]}..."
)
return {
"prompt": prompt,
"negative_prompt": self.DEFAULT_NEGATIVE,
}
[docs]
def create_wireframe_storyboard_jobs(storyboard) -> list:
"""Create ControlNet-guided DiffusionJobs from video keyframes.
Matches keyframes (by ``scene_number``) to script rows and creates one
:class:`~cw.diffusion.models.DiffusionJob` per keyframe per ``images_per_row``.
Each DiffusionJob is configured with the storyboard's ControlNet settings
and the keyframe image as the reference input.
Args:
storyboard: :class:`~cw.tvspots.models.Storyboard` with
``source_type="keyframe"`` and a ``controlnet_model`` set.
Returns:
List of created :class:`~cw.diffusion.models.DiffusionJob` instances.
Raises:
ValueError: If the VideoAdUnit has no source media or keyframes.
"""
from django.core.files import File
from cw.diffusion.models import DiffusionJob, Prompt
from cw.tvspots.models import StoryboardImage
video_ad_unit = storyboard.video_ad_unit
campaign = video_ad_unit.campaign
# Resolve ControlNet settings with fallback to model defaults
controlnet = storyboard.controlnet_model
preprocessing = (
storyboard.preprocessing_type or controlnet.control_type
)
cond_scale = (
storyboard.conditioning_scale
if storyboard.conditioning_scale is not None
else controlnet.default_conditioning_scale
)
guidance_end = (
storyboard.control_guidance_end
if storyboard.control_guidance_end is not None
else controlnet.default_guidance_end
)
# Get keyframes via VideoAdUnit → source_media → result → key_frames
source_media = getattr(video_ad_unit, "source_media", None)
if not source_media or not source_media.result:
raise ValueError(
f"VideoAdUnit {video_ad_unit.code} has no source media with "
"processing results. Upload and process a video first."
)
key_frames = list(
source_media.result.key_frames.all().order_by("scene_number")
)
if not key_frames:
raise ValueError(
f"No keyframes found for VideoAdUnit {video_ad_unit.code}. "
"Ensure video analysis completed successfully."
)
# Build scene_number → script_row mapping
script_rows_by_scene = {}
for row in video_ad_unit.script_rows.all():
# shot_number often matches scene_number; fall back to order_index + 1
try:
scene_num = int(row.shot_number)
except (ValueError, TypeError):
scene_num = row.order_index + 1
script_rows_by_scene[scene_num] = row
prompt_builder = WireframePromptBuilder(style_prompt=storyboard.style_prompt)
created_jobs = []
for key_frame in key_frames:
script_row = script_rows_by_scene.get(key_frame.scene_number)
if not script_row:
logger.warning(
f"No script row for scene {key_frame.scene_number}, skipping keyframe"
)
continue
visual_hint = script_row.visual_text if script_row else ""
prompt_data = prompt_builder.build_prompt(
visual_text=visual_hint,
scene_number=key_frame.scene_number,
)
shot_number = script_row.shot_number or f"{script_row.order_index + 1:02d}"
for img_idx in range(storyboard.images_per_row):
identifier = (
f"{campaign.job_id}_{video_ad_unit.code}"
f"_wf-{shot_number}_img-{img_idx + 1:02d}"
)
prompt_record = Prompt.objects.create(
source_prompt=prompt_data["prompt"],
enhanced_prompt=prompt_data["prompt"],
negative_prompt=prompt_data["negative_prompt"],
enhancement_method="none",
)
# Create DiffusionJob with ControlNet settings and keyframe reference
diffusion_job = DiffusionJob.objects.create(
diffusion_model=storyboard.diffusion_model,
lora_model=storyboard.lora_model,
prompt=prompt_record,
identifier=identifier,
status="pending",
width=1280,
height=720,
controlnet_model=controlnet,
preprocessing_type=preprocessing,
conditioning_scale=cond_scale,
control_guidance_end=guidance_end,
)
# Copy the keyframe image to the DiffusionJob's reference_image field
if key_frame.image:
with open(key_frame.image.path, "rb") as f:
diffusion_job.reference_image.save(
f"ref_scene_{key_frame.scene_number:03d}.jpg",
File(f),
save=True,
)
# Create StoryboardImage linking everything
StoryboardImage.objects.create(
storyboard=storyboard,
script_row=script_row,
diffusion_job=diffusion_job,
key_frame=key_frame,
image_index=img_idx,
)
created_jobs.append(diffusion_job)
logger.info(
f"Created wireframe DiffusionJob: {identifier}",
extra={
"storyboard_id": storyboard.pk,
"diffusion_job_id": diffusion_job.pk,
"key_frame_id": key_frame.pk,
"scene_number": key_frame.scene_number,
"image_index": img_idx,
},
)
logger.info(
f"Created {len(created_jobs)} wireframe DiffusionJobs for storyboard {storyboard.pk}",
extra={
"storyboard_id": storyboard.pk,
"num_jobs": len(created_jobs),
"num_keyframes": len(key_frames),
},
)
return created_jobs