Some refactoring and implemented TMDB reference image download and main file

This commit is contained in:
Maximilian Giller 2025-09-17 22:56:44 +02:00
parent fc546b2741
commit fd9652bdec
7 changed files with 260 additions and 3 deletions

View file

@ -9,5 +9,6 @@ dependencies = [
"matplotlib>=3.10.6",
"numpy>=2.3.2",
"opencv-python>=4.11.0.86",
"requests>=2.32.5",
"scikit-image>=0.25.2",
]

View file

@ -1,11 +1,14 @@
import logging
import imdbinfo as imdb
from structures import PathInfo, FileInfo, PathCategory, FileCategory
from models import PathInfo, FileInfo, PathCategory, FileCategory
from pathlib import Path
import os
import re
import math
import tmdb
def classify_show(info: PathInfo) -> PathInfo:
# Gather meta information for identifying episodes
episode_durations: set[int] = set()
@ -77,8 +80,15 @@ def classify_files(path: str) -> PathInfo | None:
return None
logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
tmdb_id = tmdb.search_show(imdb_entry.id, imdb_entry.title, year)
info = PathInfo(
path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
path=p,
title=imdb_entry.title,
year=year,
imdb_id=imdb_entry.imdb_id,
imdb=imdb_entry,
tmdb_id=tmdb_id,
)
# Identify category

66
src/main.py Normal file
View file

@ -0,0 +1,66 @@
from pathlib import Path
from file_classifier import classify_files
from match_episodes import match_episodes_to_references
import argparse
import logging
from models import PathCategory
import tmdb
def main(args: argparse.Namespace):
info = classify_files(args.input)
if info is None:
logging.error("Could not classify files.")
return
if info.category != PathCategory.SHOW:
logging.error(
f"Directory not recognized as SHOW, but as [{info.category}] instead. Only SHOW supported at the moment."
)
return
# ==== Process SHOW ====
if info.episodes is None:
logging.error(
"Episodes could not be identified, no reference matching possible."
)
return
if info.tmdb_id is None:
logging.error("TMDB entry not identified, cannot find reference images.")
return
# Match episodes to references
references = tmdb.download_episode_images(info.tmdb_id)
matches = match_episodes_to_references(
[str(f.path.absolute()) for f in info.episodes], references.flatten()
)
# Set new episode names
# TODO: Resolve matching results
# Rename files
# TODO: Rename files
logging.info(f"Finished processing [{info.path}].")
def args_parser() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Automatic renamer for ripped DVD and Blu-Ray files."
)
parser.add_argument(
"input",
type=Path,
help="Path to directory of a ripped movie or show, already renamed to be easily identifiable.",
)
return parser.parse_args()
if __name__ == "__main__":
args = args_parser()
main(args)

View file

@ -6,6 +6,37 @@ from os import path
import subprocess
import json
import numpy as np
@dataclass
class ReferenceShowImages:
images: dict[str, dict[str, list[np.ndarray]]]
"""All reference images sorted by Season > Episode > Images."""
def get_season(self, season: str) -> dict[str, list[np.ndarray]]:
season = str(season)
if season not in self.images.keys():
raise ValueError(f"Season [{season}] not found in reference images.")
return self.images[season]
def get_episode(self, season: str, episode: str) -> list[np.ndarray]:
episodes = self.get_season(season)
episode = str(episode)
if episode not in episodes.keys():
raise ValueError(
f"Episode [{episode}] not found in reference images for season [{season}]."
)
return episodes[episode]
def flatten(self) -> dict[str, list[np.ndarray]]:
"""Collapse Seasons and episodes into a shared ID."""
return {
f"S[{season}]E[{episode}]": imgs
for season, eps in self.images.items()
for episode, imgs in eps.items()
}
@dataclass
class Resolution:
@ -178,10 +209,14 @@ class PathInfo:
imdb: MovieBriefInfo
"""IMDB info object referencing media."""
tmdb_id: str | None
"""TMDB id, or None if not identified."""
category: PathCategory = PathCategory.UNCLASSIFIED
"""Category of path media."""
is_bluray_quality: bool = False
episodes: list[FileInfo] | None = None
"""List of episodes if SHOW and episodes identified. Otherwise None."""
files: list[FileInfo] = field(default_factory=list)
"""List of all files in the path."""

101
src/tmdb.py Normal file
View file

@ -0,0 +1,101 @@
import cv2
import numpy as np
import requests
import logging
from models import ReferenceShowImages
# ==== CONFIGURATION ====
TMDB_API_KEY = "b7006350eb3eeb4cf7d9cb6db44cdc0b" # <-- Replace with your TMDB API key
BASE_URL = "https://api.themoviedb.org/3"
IMG_BASE = "https://image.tmdb.org/t/p/original"
def tmdb_request(endpoint: str, params: dict = {}):
"""Helper to query TMDB API with authentication."""
params["api_key"] = TMDB_API_KEY
response = requests.get(f"{BASE_URL}{endpoint}", params=params)
response.raise_for_status()
return response.json()
def search_show(imdb_id: str, title: str, year: int) -> str | None:
"""Find TMDB ID by IMDb ID first, fallback to title/year. Returns TMDB id if successful, otherwise None."""
# Try external source (IMDb ID)
try:
res = tmdb_request("/find/" + imdb_id, {"external_source": "imdb_id"})
if res.get("tv_results"):
return res["tv_results"][0]["id"]
except Exception as e:
logging.warning(
"TMDB lookup with IMDB ID failed for SHOW, falling back to search:", e
)
# Fallback to title/year search
res = tmdb_request("/search/tv", {"query": title, "first_air_date_year": year})
if res.get("results"):
return res["results"][0]["id"]
logging.error(f"Unable to find show for title [{title}] and year [{year}].")
return None
def download_image(img_path: str) -> np.ndarray | None:
"""Download a single image from TMDB and return as numpy array (BGR). On error returns None."""
url = IMG_BASE + img_path
try:
r = requests.get(url, stream=True, timeout=10)
r.raise_for_status()
img_array = np.frombuffer(r.content, np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
if img is None:
logging.warning(f"cv2.imdecode failed for downloaded image [{url}].")
return None
return img
except Exception as e:
logging.warning(f"Failed to download image [{url}]: {e}")
return None
def download_episode_images(
tmdb_id: str, seasons: list[int] | None = None
) -> ReferenceShowImages:
"""Loop through all seasons and episodes, downloading images. Given an TMDB id."""
show_details = tmdb_request(f"/tv/{tmdb_id}")
# Download images for seasons
season_episode_images = {}
for season in show_details.get("seasons", []):
season_number = season["season_number"]
if seasons is not None and season_number not in seasons:
continue
logging.info(f"Fetching season [{season_number}] images.")
season_episode_images[season_number] = {}
season_details = tmdb_request(f"/tv/{tmdb_id}/season/{season_number}")
# Download images for episodes
for episode in season_details.get("episodes", []):
ep_num = episode["episode_number"]
season_episode_images[season_number][ep_num] = []
# Fetch episode images
images = tmdb_request(
f"/tv/{tmdb_id}/season/{season_number}/episode/{ep_num}/images"
)
for idx, still in enumerate(images.get("stills", [])):
image = download_image(still["file_path"])
if image is not None:
season_episode_images[season_number][ep_num].append(image)
return ReferenceShowImages(season_episode_images)
# if __name__ == "__main__":
# tv_id = search_show(IMDB_ID, TITLE, YEAR)
# if tv_id:
# print(f"Found TMDB TV ID: {tv_id}")
# download_episode_images(tv_id)
# else:
# print("Could not find show on TMDB.")

44
uv.lock
View file

@ -16,6 +16,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
]
[[package]]
name = "certifi"
version = "2025.8.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.3"
@ -195,6 +204,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
[[package]]
name = "idna"
version = "3.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
]
[[package]]
name = "imageio"
version = "2.37.0"
@ -774,6 +792,7 @@ dependencies = [
{ name = "matplotlib" },
{ name = "numpy" },
{ name = "opencv-python" },
{ name = "requests" },
{ name = "scikit-image" },
]
@ -783,9 +802,25 @@ requires-dist = [
{ name = "matplotlib", specifier = ">=3.10.6" },
{ name = "numpy", specifier = ">=2.3.2" },
{ name = "opencv-python", specifier = ">=4.11.0.86" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "scikit-image", specifier = ">=0.25.2" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "scikit-image"
version = "0.25.2"
@ -913,6 +948,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
]
[[package]]
name = "urllib3"
version = "2.5.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
]
[[package]]
name = "urllib3-future"
version = "2.13.908"