Some refactoring and implemented TMDB reference image download and main file

2025-09-17 22:56:44 +02:00 · 2025-09-17 22:56:44 +02:00 · fd9652bdec
commit fd9652bdec
parent fc546b2741
7 changed files with 260 additions and 3 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,5 +9,6 @@ dependencies = [
    "matplotlib>=3.10.6",
    "numpy>=2.3.2",
    "opencv-python>=4.11.0.86",
    "requests>=2.32.5",
    "scikit-image>=0.25.2",
 ]
--- a/src/file_classifier.py
+++ b/src/file_classifier.py
@ -1,11 +1,14 @@
 import logging
 import imdbinfo as imdb
-from structures import PathInfo, FileInfo, PathCategory, FileCategory
+from models import PathInfo, FileInfo, PathCategory, FileCategory
 from pathlib import Path
 import os
 import re
 import math
 import tmdb
 def classify_show(info: PathInfo) -> PathInfo:
    # Gather meta information for identifying episodes
    episode_durations: set[int] = set()
@ -77,8 +80,15 @@ def classify_files(path: str) -> PathInfo | None:
        return None
    logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
    tmdb_id = tmdb.search_show(imdb_entry.id, imdb_entry.title, year)
    info = PathInfo(
-        path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
+        path=p,
        title=imdb_entry.title,
        year=year,
        imdb_id=imdb_entry.imdb_id,
        imdb=imdb_entry,
        tmdb_id=tmdb_id,
    )
    # Identify category
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,66 @@
 from pathlib import Path
 from file_classifier import classify_files
 from match_episodes import match_episodes_to_references
 import argparse
 import logging
 from models import PathCategory
 import tmdb
 def main(args: argparse.Namespace):
    info = classify_files(args.input)
    if info is None:
        logging.error("Could not classify files.")
        return
    if info.category != PathCategory.SHOW:
        logging.error(
            f"Directory not recognized as SHOW, but as [{info.category}] instead. Only SHOW supported at the moment."
        )
        return
    # ==== Process SHOW ====
    if info.episodes is None:
        logging.error(
            "Episodes could not be identified, no reference matching possible."
        )
        return
    if info.tmdb_id is None:
        logging.error("TMDB entry not identified, cannot find reference images.")
        return
    # Match episodes to references
    references = tmdb.download_episode_images(info.tmdb_id)
    matches = match_episodes_to_references(
        [str(f.path.absolute()) for f in info.episodes], references.flatten()
    )
    # Set new episode names
    # TODO: Resolve matching results
    # Rename files
    # TODO: Rename files
    logging.info(f"Finished processing [{info.path}].")
 def args_parser() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Automatic renamer for ripped DVD and Blu-Ray files."
    )
    parser.add_argument(
        "input",
        type=Path,
        help="Path to directory of a ripped movie or show, already renamed to be easily identifiable.",
    )
    return parser.parse_args()
 if __name__ == "__main__":
    args = args_parser()
    main(args)
--- a/src/match_episodes.py
+++ b/src/match_episodes.py
--- a/src/structures.py
+++ b/src/structures.py
@ -6,6 +6,37 @@ from os import path
 import subprocess
 import json
 import numpy as np
@dataclass
 class ReferenceShowImages:
    images: dict[str, dict[str, list[np.ndarray]]]
    """All reference images sorted by Season > Episode > Images."""
    def get_season(self, season: str) -> dict[str, list[np.ndarray]]:
        season = str(season)
        if season not in self.images.keys():
            raise ValueError(f"Season [{season}] not found in reference images.")
        return self.images[season]
    def get_episode(self, season: str, episode: str) -> list[np.ndarray]:
        episodes = self.get_season(season)
        episode = str(episode)
        if episode not in episodes.keys():
            raise ValueError(
                f"Episode [{episode}] not found in reference images for season [{season}]."
            )
        return episodes[episode]
    def flatten(self) -> dict[str, list[np.ndarray]]:
        """Collapse Seasons and episodes into a shared ID."""
        return {
            f"S[{season}]E[{episode}]": imgs
            for season, eps in self.images.items()
            for episode, imgs in eps.items()
        }
@dataclass
 class Resolution:
@ -178,10 +209,14 @@ class PathInfo:
    imdb: MovieBriefInfo
    """IMDB info object referencing media."""
    tmdb_id: str | None
    """TMDB id, or None if not identified."""
    category: PathCategory = PathCategory.UNCLASSIFIED
    """Category of path media."""
-    is_bluray_quality: bool = False
+    episodes: list[FileInfo] | None = None
    """List of episodes if SHOW and episodes identified. Otherwise None."""
    files: list[FileInfo] = field(default_factory=list)
    """List of all files in the path."""
--- a/src/tmdb.py
+++ b/src/tmdb.py
@ -0,0 +1,101 @@
 import cv2
 import numpy as np
 import requests
 import logging
 from models import ReferenceShowImages
 # ==== CONFIGURATION ====
 TMDB_API_KEY = "b7006350eb3eeb4cf7d9cb6db44cdc0b"  # <-- Replace with your TMDB API key
 BASE_URL = "https://api.themoviedb.org/3"
 IMG_BASE = "https://image.tmdb.org/t/p/original"
 def tmdb_request(endpoint: str, params: dict = {}):
    """Helper to query TMDB API with authentication."""
    params["api_key"] = TMDB_API_KEY
    response = requests.get(f"{BASE_URL}{endpoint}", params=params)
    response.raise_for_status()
    return response.json()
 def search_show(imdb_id: str, title: str, year: int) -> str | None:
    """Find TMDB ID by IMDb ID first, fallback to title/year. Returns TMDB id if successful, otherwise None."""
    # Try external source (IMDb ID)
    try:
        res = tmdb_request("/find/" + imdb_id, {"external_source": "imdb_id"})
        if res.get("tv_results"):
            return res["tv_results"][0]["id"]
    except Exception as e:
        logging.warning(
            "TMDB lookup with IMDB ID failed for SHOW, falling back to search:", e
        )
    # Fallback to title/year search
    res = tmdb_request("/search/tv", {"query": title, "first_air_date_year": year})
    if res.get("results"):
        return res["results"][0]["id"]
    logging.error(f"Unable to find show for title [{title}] and year [{year}].")
    return None
 def download_image(img_path: str) -> np.ndarray | None:
    """Download a single image from TMDB and return as numpy array (BGR). On error returns None."""
    url = IMG_BASE + img_path
    try:
        r = requests.get(url, stream=True, timeout=10)
        r.raise_for_status()
        img_array = np.frombuffer(r.content, np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        if img is None:
            logging.warning(f"cv2.imdecode failed for downloaded image [{url}].")
            return None
        return img
    except Exception as e:
        logging.warning(f"Failed to download image [{url}]: {e}")
        return None
 def download_episode_images(
    tmdb_id: str, seasons: list[int] | None = None
 ) -> ReferenceShowImages:
    """Loop through all seasons and episodes, downloading images. Given an TMDB id."""
    show_details = tmdb_request(f"/tv/{tmdb_id}")
    # Download images for seasons
    season_episode_images = {}
    for season in show_details.get("seasons", []):
        season_number = season["season_number"]
        if seasons is not None and season_number not in seasons:
            continue
        logging.info(f"Fetching season [{season_number}] images.")
        season_episode_images[season_number] = {}
        season_details = tmdb_request(f"/tv/{tmdb_id}/season/{season_number}")
        # Download images for episodes
        for episode in season_details.get("episodes", []):
            ep_num = episode["episode_number"]
            season_episode_images[season_number][ep_num] = []
            # Fetch episode images
            images = tmdb_request(
                f"/tv/{tmdb_id}/season/{season_number}/episode/{ep_num}/images"
            )
            for idx, still in enumerate(images.get("stills", [])):
                image = download_image(still["file_path"])
                if image is not None:
                    season_episode_images[season_number][ep_num].append(image)
    return ReferenceShowImages(season_episode_images)
 # if __name__ == "__main__":
 #     tv_id = search_show(IMDB_ID, TITLE, YEAR)
 #     if tv_id:
 #         print(f"Found TMDB TV ID: {tv_id}")
 #         download_episode_images(tv_id)
 #     else:
 #         print("Could not find show on TMDB.")
--- a/uv.lock
+++ b/uv.lock
@ -16,6 +16,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 [[package]]
 name = "certifi"
 version = "2025.8.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
 ]
 [[package]]
 name = "charset-normalizer"
 version = "3.4.3"
@ -195,6 +204,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 [[package]]
 name = "idna"
 version = "3.10"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
 ]
 [[package]]
 name = "imageio"
 version = "2.37.0"
@ -774,6 +792,7 @@ dependencies = [
    { name = "matplotlib" },
    { name = "numpy" },
    { name = "opencv-python" },
    { name = "requests" },
    { name = "scikit-image" },
 ]
@ -783,9 +802,25 @@ requires-dist = [
    { name = "matplotlib", specifier = ">=3.10.6" },
    { name = "numpy", specifier = ">=2.3.2" },
    { name = "opencv-python", specifier = ">=4.11.0.86" },
    { name = "requests", specifier = ">=2.32.5" },
    { name = "scikit-image", specifier = ">=0.25.2" },
 ]
 [[package]]
 name = "requests"
 version = "2.32.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "certifi" },
    { name = "charset-normalizer" },
    { name = "idna" },
    { name = "urllib3" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 [[package]]
 name = "scikit-image"
 version = "0.25.2"
@ -913,6 +948,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
 ]
 [[package]]
 name = "urllib3"
 version = "2.5.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
 ]
 [[package]]
 name = "urllib3-future"
 version = "2.13.908"