Source code for cw.lib.video_analysis.scene_detection

"""
Scene detection using PySceneDetect.

Identifies distinct scenes in video based on visual content changes.
"""

from typing import Dict, List

from scenedetect import SceneManager, open_video
from scenedetect.detectors import ContentDetector



[docs]
def detect_scenes(
    video_path: str, threshold: float = 27.0, min_scene_len: int = 15
) -> List[Dict]:
    """
    Detect scene boundaries using content-based detection.

    Uses PySceneDetect's ContentDetector which analyzes changes in content
    between frames to identify scene boundaries.

    Args:
        video_path: Path to video file
        threshold: Sensitivity threshold for scene detection (default: 27.0)
                  Lower values = more scenes detected
                  Typical range: 15-40
        min_scene_len: Minimum scene length in frames (default: 15)

    Returns:
        List of scene dictionaries:
        [
            {
                "scene_number": 1,
                "start_time": 0.0,
                "end_time": 3.5,
                "duration": 3.5,
                "start_frame": 0,
                "end_frame": 105
            },
            ...
        ]

    Raises:
        FileNotFoundError: If video file doesn't exist
        Exception: If scene detection fails
    """
    # Open video with scenedetect
    video = open_video(video_path)

    # Create scene manager with content detector
    scene_manager = SceneManager()
    scene_manager.add_detector(
        ContentDetector(threshold=threshold, min_scene_len=min_scene_len)
    )

    # Detect scenes
    scene_manager.detect_scenes(video)

    # Get detected scene list
    scene_list = scene_manager.get_scene_list()

    # Convert to our format
    scenes = []
    for idx, (start_time, end_time) in enumerate(scene_list, 1):
        # FrameTimecode objects have get_seconds() and get_frames() methods
        scenes.append(
            {
                "scene_number": idx,
                "start_time": start_time.get_seconds(),
                "end_time": end_time.get_seconds(),
                "duration": (end_time - start_time).get_seconds(),
                "start_frame": start_time.get_frames(),
                "end_frame": end_time.get_frames(),
            }
        )

    return scenes