Source code for cw.lib.video_analysis.metadata
"""
Video metadata extraction using PyAV (FFmpeg).
Extracts technical metadata from video files including duration,
resolution, frame rate, and audio properties.
"""
import os
from typing import Dict
import av
[docs]
def extract_video_metadata(video_path: str) -> Dict:
"""
Extract metadata from a video file using PyAV.
Args:
video_path: Path to the video file
Returns:
Dictionary containing video metadata:
{
'duration': float (seconds),
'width': int,
'height': int,
'frame_rate': float,
'audio_channels': int,
'sample_rate': int,
'file_size': int (bytes)
}
Raises:
FileNotFoundError: If video file doesn't exist
av.AVError: If file cannot be opened or is not a valid video
"""
if not os.path.exists(video_path):
raise FileNotFoundError(f"Video file not found: {video_path}")
metadata = {
"duration": None,
"width": None,
"height": None,
"frame_rate": None,
"audio_channels": None,
"sample_rate": None,
"file_size": None,
}
try:
# Get file size
metadata["file_size"] = os.path.getsize(video_path)
# Open video file with PyAV
with av.open(video_path) as container:
# Extract video stream metadata
video_stream = None
for stream in container.streams.video:
video_stream = stream
break # Use first video stream
if video_stream:
metadata["width"] = video_stream.width
metadata["height"] = video_stream.height
# Calculate frame rate
if video_stream.average_rate:
metadata["frame_rate"] = float(video_stream.average_rate)
# Get duration from video stream or container
if video_stream.duration:
# Convert from time_base to seconds
metadata["duration"] = float(
video_stream.duration * video_stream.time_base
)
elif container.duration:
# Container duration is in microseconds
metadata["duration"] = float(container.duration) / 1000000.0
# Extract audio stream metadata
audio_stream = None
for stream in container.streams.audio:
audio_stream = stream
break # Use first audio stream
if audio_stream:
metadata["audio_channels"] = audio_stream.channels
metadata["sample_rate"] = audio_stream.rate
except av.AVError as e:
raise av.AVError(f"Failed to extract metadata from {video_path}: {e}")
return metadata