From d0edc94f95b7d8b08e9f36b2ac589a1512bb93c6 Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 12 Sep 2025 19:26:32 +0200 Subject: [PATCH] Improved metadata implementation --- src/file_classifier.py | 51 ++++++++----------------- src/structures.py | 86 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 38 deletions(-) diff --git a/src/file_classifier.py b/src/file_classifier.py index a54b780..a36b846 100644 --- a/src/file_classifier.py +++ b/src/file_classifier.py @@ -4,57 +4,36 @@ from structures import PathInfo, FileInfo, PathCategory, FileCategory from pathlib import Path import os import re -import subprocess -import json import math - -def get_metadata(filepath: Path) -> dict: - cmd = [ - "ffprobe", - "-v", - "quiet", - "-print_format", - "json", - "-show_format", - "-show_streams", - filepath, - ] - result = subprocess.run( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - return json.loads(result.stdout) - - def classify_show(info: PathInfo) -> PathInfo: - # Prepare meta information - episode_durations: list[int] = [] - movie_information = None - if info.category == PathCategory.SHOW: - for ep in imdb.get_all_episodes(info.imdb.imdb_id): - if ep.duration: - episode_durations.append(int(ep.duration / 60)) + # Gather meta information for identifying episodes + episode_durations: set[int] = set() + for ep in imdb.get_all_episodes(info.imdb.imdb_id): + if ep.duration: + episode_durations.add(int(ep.duration / 60)) + + logging.debug(episode_durations) - print(set(episode_durations)) # Go over all files count = 0 for root, dirs, files in os.walk(info.path): for filename in files: filepath = Path(os.path.join(root, filename)) - if filepath.suffix != ".mkv": + file = FileInfo(filepath, info) + + if file.path.suffix != ".mkv" or file.duration_in_seconds is None: continue - m = get_metadata(filepath) - duration = int(float(m["format"]["duration"]) / 60) if ( - math.floor(duration) in episode_durations - or math.ceil(duration) in episode_durations + math.floor(file.duration_in_seconds / 60) in episode_durations + or math.ceil(file.duration_in_seconds / 60) in episode_durations ): - print(f"{filename} {duration} EPISODE") + print(f"{filename} {file.duration_in_seconds / 60} EPISODE") count += 1 else: - print(f"{filename} {duration}") + print(f"{filename} {file.duration_in_seconds / 60}") - print(count) + logging.info(f"Identified [{count}] episodes.") return info diff --git a/src/structures.py b/src/structures.py index 9a62c34..5ffa388 100644 --- a/src/structures.py +++ b/src/structures.py @@ -3,6 +3,37 @@ from enum import Enum from pathlib import Path from imdbinfo.models import MovieBriefInfo from os import path +import subprocess +import json + + +@dataclass +class Resolution: + width: int + """Horizontal resolution.""" + + height: int + """Vertical resolution.""" + + @property + def w(self) -> int: + """Horizontal resolution.""" + return self.width + + @property + def h(self) -> int: + """Vertical resolution.""" + return self.height + + @property + def x(self) -> int: + """Horizontal resolution.""" + return self.width + + @property + def y(self) -> int: + """Vertical resolution.""" + return self.height class FileCategory(Enum): @@ -47,7 +78,7 @@ class PathCategory(Enum): @dataclass class FileInfo: - original_path: Path + path: Path """Original Path to file, before any processing.""" parent_path: "PathInfo" @@ -65,16 +96,67 @@ class FileInfo: episode_no: int | None = None """Episode number in case if category is EPISODE.""" + duration_in_seconds: int | None = None + """Duration of potential video file in seconds.""" + + video_stream: dict | None = None + """Meta information about the first video stream found.""" + + @property + def video_bitrate(self) -> float | None: + """Bitrate of video in bps (bits per second).""" + if self.video_stream is None: + return None + return self.video_stream["bit_rate"] + + @property + def resolution(self) -> Resolution | None: + """Resolution of a possible video stream.""" + if self.video_stream is None: + return None + return Resolution( + width=int(self.video_stream["width"]), + height=int(self.video_stream["height"]), + ) + + def __post_init__(self): + """Read basic metadata often referenced for processing.""" + m = self.read_metadata() + + if m["format"]["duration"]: + self.duration_in_seconds = int(m["format"]["duration"]) + + self.video_stream = next( + (s for s in m["streams"] if s["codec_type"] == "video"), None + ) + @property def new_path(self) -> Path: """New Path.""" return Path( path.join( self.parent_path.path, - f"{self.new_file_name}{self.original_path.suffix}", + f"{self.new_file_name}{self.path.suffix}", ) ) + def read_metadata(self) -> dict: + """Reads metadata using ffprobe.""" + cmd = [ + "ffprobe", + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + "-show_streams", + self.path, + ] + result = subprocess.run( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + return json.loads(result.stdout) + @dataclass class PathInfo: