Source code for cw.lib.video_analysis.scene_detection

"""
Scene detection using PySceneDetect.

Identifies distinct scenes in video based on visual content changes.
"""

from typing import Dict, List

from scenedetect import SceneManager, open_video
from scenedetect.detectors import ContentDetector


[docs] def detect_scenes( video_path: str, threshold: float = 27.0, min_scene_len: int = 15 ) -> List[Dict]: """ Detect scene boundaries using content-based detection. Uses PySceneDetect's ContentDetector which analyzes changes in content between frames to identify scene boundaries. Args: video_path: Path to video file threshold: Sensitivity threshold for scene detection (default: 27.0) Lower values = more scenes detected Typical range: 15-40 min_scene_len: Minimum scene length in frames (default: 15) Returns: List of scene dictionaries: [ { "scene_number": 1, "start_time": 0.0, "end_time": 3.5, "duration": 3.5, "start_frame": 0, "end_frame": 105 }, ... ] Raises: FileNotFoundError: If video file doesn't exist Exception: If scene detection fails """ # Open video with scenedetect video = open_video(video_path) # Create scene manager with content detector scene_manager = SceneManager() scene_manager.add_detector( ContentDetector(threshold=threshold, min_scene_len=min_scene_len) ) # Detect scenes scene_manager.detect_scenes(video) # Get detected scene list scene_list = scene_manager.get_scene_list() # Convert to our format scenes = [] for idx, (start_time, end_time) in enumerate(scene_list, 1): # FrameTimecode objects have get_seconds() and get_frames() methods scenes.append( { "scene_number": idx, "start_time": start_time.get_seconds(), "end_time": end_time.get_seconds(), "duration": (end_time - start_time).get_seconds(), "start_frame": start_time.get_frames(), "end_frame": end_time.get_frames(), } ) return scenes