Some refactoring and implemented TMDB reference image download and main file

2025-09-17 22:56:44 +02:00 · 2025-09-17 22:56:44 +02:00 · fd9652bdec
commit fd9652bdec
parent fc546b2741
7 changed files with 260 additions and 3 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,5 +9,6 @@ dependencies = [
    "matplotlib>=3.10.6",
    "numpy>=2.3.2",
    "opencv-python>=4.11.0.86",
+    "requests>=2.32.5",
    "scikit-image>=0.25.2",
 ]
--- a/src/file_classifier.py
+++ b/src/file_classifier.py
@ -1,11 +1,14 @@
 import logging
 import imdbinfo as imdb
-from structures import PathInfo, FileInfo, PathCategory, FileCategory
+from models import PathInfo, FileInfo, PathCategory, FileCategory
 from pathlib import Path
 import os
 import re
 import math

+import tmdb
+
+
 def classify_show(info: PathInfo) -> PathInfo:
    # Gather meta information for identifying episodes
    episode_durations: set[int] = set()
@ -77,8 +80,15 @@ def classify_files(path: str) -> PathInfo | None:
        return None
    logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")

+    tmdb_id = tmdb.search_show(imdb_entry.id, imdb_entry.title, year)
+
    info = PathInfo(
-        path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
+        path=p,
+        title=imdb_entry.title,
+        year=year,
+        imdb_id=imdb_entry.imdb_id,
+        imdb=imdb_entry,
+        tmdb_id=tmdb_id,
    )

    # Identify category
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,66 @@
+from pathlib import Path
+
+from file_classifier import classify_files
+from match_episodes import match_episodes_to_references
+import argparse
+import logging
+
+from models import PathCategory
+import tmdb
+
+
+def main(args: argparse.Namespace):
+    info = classify_files(args.input)
+
+    if info is None:
+        logging.error("Could not classify files.")
+        return
+
+    if info.category != PathCategory.SHOW:
+        logging.error(
+            f"Directory not recognized as SHOW, but as [{info.category}] instead. Only SHOW supported at the moment."
+        )
+        return
+
+    # ==== Process SHOW ====
+    if info.episodes is None:
+        logging.error(
+            "Episodes could not be identified, no reference matching possible."
+        )
+        return
+
+    if info.tmdb_id is None:
+        logging.error("TMDB entry not identified, cannot find reference images.")
+        return
+
+    # Match episodes to references
+    references = tmdb.download_episode_images(info.tmdb_id)
+    matches = match_episodes_to_references(
+        [str(f.path.absolute()) for f in info.episodes], references.flatten()
+    )
+
+    # Set new episode names
+    # TODO: Resolve matching results
+    
+    # Rename files
+    # TODO: Rename files
+    
+    logging.info(f"Finished processing [{info.path}].")
+
+
+def args_parser() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Automatic renamer for ripped DVD and Blu-Ray files."
+    )
+    parser.add_argument(
+        "input",
+        type=Path,
+        help="Path to directory of a ripped movie or show, already renamed to be easily identifiable.",
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = args_parser()
+    main(args)
--- a/src/match_episodes.py
+++ b/src/match_episodes.py
--- a/src/structures.py
+++ b/src/structures.py
@ -6,6 +6,37 @@ from os import path
 import subprocess
 import json

+import numpy as np
+
+
+@dataclass
+class ReferenceShowImages:
+    images: dict[str, dict[str, list[np.ndarray]]]
+    """All reference images sorted by Season > Episode > Images."""
+
+    def get_season(self, season: str) -> dict[str, list[np.ndarray]]:
+        season = str(season)
+        if season not in self.images.keys():
+            raise ValueError(f"Season [{season}] not found in reference images.")
+        return self.images[season]
+
+    def get_episode(self, season: str, episode: str) -> list[np.ndarray]:
+        episodes = self.get_season(season)
+        episode = str(episode)
+        if episode not in episodes.keys():
+            raise ValueError(
+                f"Episode [{episode}] not found in reference images for season [{season}]."
+            )
+        return episodes[episode]
+
+    def flatten(self) -> dict[str, list[np.ndarray]]:
+        """Collapse Seasons and episodes into a shared ID."""
+        return {
+            f"S[{season}]E[{episode}]": imgs
+            for season, eps in self.images.items()
+            for episode, imgs in eps.items()
+        }
+

@dataclass
 class Resolution:
@ -178,10 +209,14 @@ class PathInfo:
    imdb: MovieBriefInfo
    """IMDB info object referencing media."""

+    tmdb_id: str | None
+    """TMDB id, or None if not identified."""
+
    category: PathCategory = PathCategory.UNCLASSIFIED
    """Category of path media."""

-    is_bluray_quality: bool = False
+    episodes: list[FileInfo] | None = None
+    """List of episodes if SHOW and episodes identified. Otherwise None."""

    files: list[FileInfo] = field(default_factory=list)
    """List of all files in the path."""
--- a/src/tmdb.py
+++ b/src/tmdb.py
@ -0,0 +1,101 @@
+import cv2
+import numpy as np
+import requests
+import logging
+
+from models import ReferenceShowImages
+
+# ==== CONFIGURATION ====
+TMDB_API_KEY = "b7006350eb3eeb4cf7d9cb6db44cdc0b"  # <-- Replace with your TMDB API key
+BASE_URL = "https://api.themoviedb.org/3"
+IMG_BASE = "https://image.tmdb.org/t/p/original"
+
+
+def tmdb_request(endpoint: str, params: dict = {}):
+    """Helper to query TMDB API with authentication."""
+    params["api_key"] = TMDB_API_KEY
+    response = requests.get(f"{BASE_URL}{endpoint}", params=params)
+    response.raise_for_status()
+    return response.json()
+
+
+def search_show(imdb_id: str, title: str, year: int) -> str | None:
+    """Find TMDB ID by IMDb ID first, fallback to title/year. Returns TMDB id if successful, otherwise None."""
+    # Try external source (IMDb ID)
+    try:
+        res = tmdb_request("/find/" + imdb_id, {"external_source": "imdb_id"})
+        if res.get("tv_results"):
+            return res["tv_results"][0]["id"]
+    except Exception as e:
+        logging.warning(
+            "TMDB lookup with IMDB ID failed for SHOW, falling back to search:", e
+        )
+
+    # Fallback to title/year search
+    res = tmdb_request("/search/tv", {"query": title, "first_air_date_year": year})
+    if res.get("results"):
+        return res["results"][0]["id"]
+
+    logging.error(f"Unable to find show for title [{title}] and year [{year}].")
+    return None
+
+
+def download_image(img_path: str) -> np.ndarray | None:
+    """Download a single image from TMDB and return as numpy array (BGR). On error returns None."""
+    url = IMG_BASE + img_path
+    try:
+        r = requests.get(url, stream=True, timeout=10)
+        r.raise_for_status()
+        img_array = np.frombuffer(r.content, np.uint8)
+        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+        if img is None:
+            logging.warning(f"cv2.imdecode failed for downloaded image [{url}].")
+            return None
+        return img
+    except Exception as e:
+        logging.warning(f"Failed to download image [{url}]: {e}")
+        return None
+
+
+def download_episode_images(
+    tmdb_id: str, seasons: list[int] | None = None
+) -> ReferenceShowImages:
+    """Loop through all seasons and episodes, downloading images. Given an TMDB id."""
+    show_details = tmdb_request(f"/tv/{tmdb_id}")
+
+    # Download images for seasons
+    season_episode_images = {}
+    for season in show_details.get("seasons", []):
+        season_number = season["season_number"]
+        if seasons is not None and season_number not in seasons:
+            continue
+
+        logging.info(f"Fetching season [{season_number}] images.")
+        season_episode_images[season_number] = {}
+        season_details = tmdb_request(f"/tv/{tmdb_id}/season/{season_number}")
+
+        # Download images for episodes
+        for episode in season_details.get("episodes", []):
+            ep_num = episode["episode_number"]
+            season_episode_images[season_number][ep_num] = []
+
+            # Fetch episode images
+            images = tmdb_request(
+                f"/tv/{tmdb_id}/season/{season_number}/episode/{ep_num}/images"
+            )
+
+            for idx, still in enumerate(images.get("stills", [])):
+                image = download_image(still["file_path"])
+                if image is not None:
+                    season_episode_images[season_number][ep_num].append(image)
+
+    return ReferenceShowImages(season_episode_images)
+
+
+# if __name__ == "__main__":
+#     tv_id = search_show(IMDB_ID, TITLE, YEAR)
+#     if tv_id:
+#         print(f"Found TMDB TV ID: {tv_id}")
+#         download_episode_images(tv_id)
+#     else:
+#         print("Could not find show on TMDB.")
--- a/uv.lock
+++ b/uv.lock
@ -16,6 +16,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]

+[[package]]
+name = "certifi"
+version = "2025.8.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.3"
@ -195,6 +204,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]

+[[package]]
+name = "idna"
+version = "3.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
+]
+
 [[package]]
 name = "imageio"
 version = "2.37.0"
@ -774,6 +792,7 @@ dependencies = [
    { name = "matplotlib" },
    { name = "numpy" },
    { name = "opencv-python" },
+    { name = "requests" },
    { name = "scikit-image" },
 ]

@ -783,9 +802,25 @@ requires-dist = [
    { name = "matplotlib", specifier = ">=3.10.6" },
    { name = "numpy", specifier = ">=2.3.2" },
    { name = "opencv-python", specifier = ">=4.11.0.86" },
+    { name = "requests", specifier = ">=2.32.5" },
    { name = "scikit-image", specifier = ">=0.25.2" },
 ]

+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
 [[package]]
 name = "scikit-image"
 version = "0.25.2"
@ -913,6 +948,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
 ]

+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+]
+
 [[package]]
 name = "urllib3-future"
 version = "2.13.908"