Improved metadata implementation

2025-09-12 19:26:32 +02:00 · 2025-09-12 19:26:32 +02:00 · d0edc94f95
commit d0edc94f95
parent 979cb981b9
2 changed files with 99 additions and 38 deletions
--- a/src/file_classifier.py
+++ b/src/file_classifier.py
@ -4,57 +4,36 @@ from structures import PathInfo, FileInfo, PathCategory, FileCategory
 from pathlib import Path
 import os
 import re
-import subprocess
-import json
 import math

-
-def get_metadata(filepath: Path) -> dict:
-    cmd = [
-        "ffprobe",
-        "-v",
-        "quiet",
-        "-print_format",
-        "json",
-        "-show_format",
-        "-show_streams",
-        filepath,
-    ]
-    result = subprocess.run(
-        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
-    )
-    return json.loads(result.stdout)
-
-
 def classify_show(info: PathInfo) -> PathInfo:
-    # Prepare meta information
-    episode_durations: list[int] = []
-    movie_information = None
-    if info.category == PathCategory.SHOW:
+    # Gather meta information for identifying episodes
+    episode_durations: set[int] = set()
    for ep in imdb.get_all_episodes(info.imdb.imdb_id):
        if ep.duration:
-                episode_durations.append(int(ep.duration / 60))
+            episode_durations.add(int(ep.duration / 60))
+
+    logging.debug(episode_durations)

-    print(set(episode_durations))
    # Go over all files
    count = 0
    for root, dirs, files in os.walk(info.path):
        for filename in files:
            filepath = Path(os.path.join(root, filename))
-            if filepath.suffix != ".mkv":
+            file = FileInfo(filepath, info)
+
+            if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
                continue
-            m = get_metadata(filepath)
-            duration = int(float(m["format"]["duration"]) / 60)
            if (
-                math.floor(duration) in episode_durations
-                or math.ceil(duration) in episode_durations
+                math.floor(file.duration_in_seconds / 60) in episode_durations
+                or math.ceil(file.duration_in_seconds / 60) in episode_durations
            ):
-                print(f"{filename} {duration} EPISODE")
+                print(f"{filename} {file.duration_in_seconds / 60} EPISODE")
                count += 1
            else:
-                print(f"{filename} {duration}")
+                print(f"{filename} {file.duration_in_seconds / 60}")

-    print(count)
+    logging.info(f"Identified [{count}] episodes.")
    return info


--- a/src/structures.py
+++ b/src/structures.py
@ -3,6 +3,37 @@ from enum import Enum
 from pathlib import Path
 from imdbinfo.models import MovieBriefInfo
 from os import path
+import subprocess
+import json
+
+
+@dataclass
+class Resolution:
+    width: int
+    """Horizontal resolution."""
+
+    height: int
+    """Vertical resolution."""
+
+    @property
+    def w(self) -> int:
+        """Horizontal resolution."""
+        return self.width
+
+    @property
+    def h(self) -> int:
+        """Vertical resolution."""
+        return self.height
+
+    @property
+    def x(self) -> int:
+        """Horizontal resolution."""
+        return self.width
+
+    @property
+    def y(self) -> int:
+        """Vertical resolution."""
+        return self.height


 class FileCategory(Enum):
@ -47,7 +78,7 @@ class PathCategory(Enum):

@dataclass
 class FileInfo:
-    original_path: Path
+    path: Path
    """Original Path to file, before any processing."""

    parent_path: "PathInfo"
@ -65,16 +96,67 @@ class FileInfo:
    episode_no: int | None = None
    """Episode number in case if category is EPISODE."""

+    duration_in_seconds: int | None = None
+    """Duration of potential video file in seconds."""
+
+    video_stream: dict | None = None
+    """Meta information about the first video stream found."""
+
+    @property
+    def video_bitrate(self) -> float | None:
+        """Bitrate of video in bps (bits per second)."""
+        if self.video_stream is None:
+            return None
+        return self.video_stream["bit_rate"]
+
+    @property
+    def resolution(self) -> Resolution | None:
+        """Resolution of a possible video stream."""
+        if self.video_stream is None:
+            return None
+        return Resolution(
+            width=int(self.video_stream["width"]),
+            height=int(self.video_stream["height"]),
+        )
+
+    def __post_init__(self):
+        """Read basic metadata often referenced for processing."""
+        m = self.read_metadata()
+
+        if m["format"]["duration"]:
+            self.duration_in_seconds = int(m["format"]["duration"])
+
+        self.video_stream = next(
+            (s for s in m["streams"] if s["codec_type"] == "video"), None
+        )
+
    @property
    def new_path(self) -> Path:
        """New Path."""
        return Path(
            path.join(
                self.parent_path.path,
-                f"{self.new_file_name}{self.original_path.suffix}",
+                f"{self.new_file_name}{self.path.suffix}",
            )
        )

+    def read_metadata(self) -> dict:
+        """Reads metadata using ffprobe."""
+        cmd = [
+            "ffprobe",
+            "-v",
+            "quiet",
+            "-print_format",
+            "json",
+            "-show_format",
+            "-show_streams",
+            self.path,
+        ]
+        result = subprocess.run(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        return json.loads(result.stdout)
+

@dataclass
 class PathInfo: