diff --git a/pyproject.toml b/pyproject.toml index af508a1..924cb34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,5 +9,6 @@ dependencies = [ "matplotlib>=3.10.6", "numpy>=2.3.2", "opencv-python>=4.11.0.86", + "requests>=2.32.5", "scikit-image>=0.25.2", ] diff --git a/src/file_classifier.py b/src/file_classifier.py index a36b846..6f951d6 100644 --- a/src/file_classifier.py +++ b/src/file_classifier.py @@ -1,11 +1,14 @@ import logging import imdbinfo as imdb -from structures import PathInfo, FileInfo, PathCategory, FileCategory +from models import PathInfo, FileInfo, PathCategory, FileCategory from pathlib import Path import os import re import math +import tmdb + + def classify_show(info: PathInfo) -> PathInfo: # Gather meta information for identifying episodes episode_durations: set[int] = set() @@ -77,8 +80,15 @@ def classify_files(path: str) -> PathInfo | None: return None logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].") + tmdb_id = tmdb.search_show(imdb_entry.id, imdb_entry.title, year) + info = PathInfo( - path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry + path=p, + title=imdb_entry.title, + year=year, + imdb_id=imdb_entry.imdb_id, + imdb=imdb_entry, + tmdb_id=tmdb_id, ) # Identify category diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..10ad6f2 --- /dev/null +++ b/src/main.py @@ -0,0 +1,66 @@ +from pathlib import Path + +from file_classifier import classify_files +from match_episodes import match_episodes_to_references +import argparse +import logging + +from models import PathCategory +import tmdb + + +def main(args: argparse.Namespace): + info = classify_files(args.input) + + if info is None: + logging.error("Could not classify files.") + return + + if info.category != PathCategory.SHOW: + logging.error( + f"Directory not recognized as SHOW, but as [{info.category}] instead. Only SHOW supported at the moment." + ) + return + + # ==== Process SHOW ==== + if info.episodes is None: + logging.error( + "Episodes could not be identified, no reference matching possible." + ) + return + + if info.tmdb_id is None: + logging.error("TMDB entry not identified, cannot find reference images.") + return + + # Match episodes to references + references = tmdb.download_episode_images(info.tmdb_id) + matches = match_episodes_to_references( + [str(f.path.absolute()) for f in info.episodes], references.flatten() + ) + + # Set new episode names + # TODO: Resolve matching results + + # Rename files + # TODO: Rename files + + logging.info(f"Finished processing [{info.path}].") + + +def args_parser() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Automatic renamer for ripped DVD and Blu-Ray files." + ) + parser.add_argument( + "input", + type=Path, + help="Path to directory of a ripped movie or show, already renamed to be easily identifiable.", + ) + + return parser.parse_args() + + +if __name__ == "__main__": + args = args_parser() + main(args) diff --git a/src/label_episodes.py b/src/match_episodes.py similarity index 100% rename from src/label_episodes.py rename to src/match_episodes.py diff --git a/src/structures.py b/src/models.py similarity index 78% rename from src/structures.py rename to src/models.py index 8cf63c5..36a61bf 100644 --- a/src/structures.py +++ b/src/models.py @@ -6,6 +6,37 @@ from os import path import subprocess import json +import numpy as np + + +@dataclass +class ReferenceShowImages: + images: dict[str, dict[str, list[np.ndarray]]] + """All reference images sorted by Season > Episode > Images.""" + + def get_season(self, season: str) -> dict[str, list[np.ndarray]]: + season = str(season) + if season not in self.images.keys(): + raise ValueError(f"Season [{season}] not found in reference images.") + return self.images[season] + + def get_episode(self, season: str, episode: str) -> list[np.ndarray]: + episodes = self.get_season(season) + episode = str(episode) + if episode not in episodes.keys(): + raise ValueError( + f"Episode [{episode}] not found in reference images for season [{season}]." + ) + return episodes[episode] + + def flatten(self) -> dict[str, list[np.ndarray]]: + """Collapse Seasons and episodes into a shared ID.""" + return { + f"S[{season}]E[{episode}]": imgs + for season, eps in self.images.items() + for episode, imgs in eps.items() + } + @dataclass class Resolution: @@ -178,10 +209,14 @@ class PathInfo: imdb: MovieBriefInfo """IMDB info object referencing media.""" + tmdb_id: str | None + """TMDB id, or None if not identified.""" + category: PathCategory = PathCategory.UNCLASSIFIED """Category of path media.""" - is_bluray_quality: bool = False + episodes: list[FileInfo] | None = None + """List of episodes if SHOW and episodes identified. Otherwise None.""" files: list[FileInfo] = field(default_factory=list) """List of all files in the path.""" diff --git a/src/tmdb.py b/src/tmdb.py new file mode 100644 index 0000000..29e3c3a --- /dev/null +++ b/src/tmdb.py @@ -0,0 +1,101 @@ +import cv2 +import numpy as np +import requests +import logging + +from models import ReferenceShowImages + +# ==== CONFIGURATION ==== +TMDB_API_KEY = "b7006350eb3eeb4cf7d9cb6db44cdc0b" # <-- Replace with your TMDB API key +BASE_URL = "https://api.themoviedb.org/3" +IMG_BASE = "https://image.tmdb.org/t/p/original" + + +def tmdb_request(endpoint: str, params: dict = {}): + """Helper to query TMDB API with authentication.""" + params["api_key"] = TMDB_API_KEY + response = requests.get(f"{BASE_URL}{endpoint}", params=params) + response.raise_for_status() + return response.json() + + +def search_show(imdb_id: str, title: str, year: int) -> str | None: + """Find TMDB ID by IMDb ID first, fallback to title/year. Returns TMDB id if successful, otherwise None.""" + # Try external source (IMDb ID) + try: + res = tmdb_request("/find/" + imdb_id, {"external_source": "imdb_id"}) + if res.get("tv_results"): + return res["tv_results"][0]["id"] + except Exception as e: + logging.warning( + "TMDB lookup with IMDB ID failed for SHOW, falling back to search:", e + ) + + # Fallback to title/year search + res = tmdb_request("/search/tv", {"query": title, "first_air_date_year": year}) + if res.get("results"): + return res["results"][0]["id"] + + logging.error(f"Unable to find show for title [{title}] and year [{year}].") + return None + + +def download_image(img_path: str) -> np.ndarray | None: + """Download a single image from TMDB and return as numpy array (BGR). On error returns None.""" + url = IMG_BASE + img_path + try: + r = requests.get(url, stream=True, timeout=10) + r.raise_for_status() + img_array = np.frombuffer(r.content, np.uint8) + img = cv2.imdecode(img_array, cv2.IMREAD_COLOR) + if img is None: + logging.warning(f"cv2.imdecode failed for downloaded image [{url}].") + return None + return img + except Exception as e: + logging.warning(f"Failed to download image [{url}]: {e}") + return None + + +def download_episode_images( + tmdb_id: str, seasons: list[int] | None = None +) -> ReferenceShowImages: + """Loop through all seasons and episodes, downloading images. Given an TMDB id.""" + show_details = tmdb_request(f"/tv/{tmdb_id}") + + # Download images for seasons + season_episode_images = {} + for season in show_details.get("seasons", []): + season_number = season["season_number"] + if seasons is not None and season_number not in seasons: + continue + + logging.info(f"Fetching season [{season_number}] images.") + season_episode_images[season_number] = {} + season_details = tmdb_request(f"/tv/{tmdb_id}/season/{season_number}") + + # Download images for episodes + for episode in season_details.get("episodes", []): + ep_num = episode["episode_number"] + season_episode_images[season_number][ep_num] = [] + + # Fetch episode images + images = tmdb_request( + f"/tv/{tmdb_id}/season/{season_number}/episode/{ep_num}/images" + ) + + for idx, still in enumerate(images.get("stills", [])): + image = download_image(still["file_path"]) + if image is not None: + season_episode_images[season_number][ep_num].append(image) + + return ReferenceShowImages(season_episode_images) + + +# if __name__ == "__main__": +# tv_id = search_show(IMDB_ID, TITLE, YEAR) +# if tv_id: +# print(f"Found TMDB TV ID: {tv_id}") +# download_episode_images(tv_id) +# else: +# print("Could not find show on TMDB.") diff --git a/uv.lock b/uv.lock index eaef92e..51c62b7 100644 --- a/uv.lock +++ b/uv.lock @@ -16,6 +16,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] +[[package]] +name = "certifi" +version = "2025.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.3" @@ -195,6 +204,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, +] + [[package]] name = "imageio" version = "2.37.0" @@ -774,6 +792,7 @@ dependencies = [ { name = "matplotlib" }, { name = "numpy" }, { name = "opencv-python" }, + { name = "requests" }, { name = "scikit-image" }, ] @@ -783,9 +802,25 @@ requires-dist = [ { name = "matplotlib", specifier = ">=3.10.6" }, { name = "numpy", specifier = ">=2.3.2" }, { name = "opencv-python", specifier = ">=4.11.0.86" }, + { name = "requests", specifier = ">=2.32.5" }, { name = "scikit-image", specifier = ">=0.25.2" }, ] +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + [[package]] name = "scikit-image" version = "0.25.2" @@ -913,6 +948,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" }, ] +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, +] + [[package]] name = "urllib3-future" version = "2.13.908"