Some refactoring and implemented TMDB reference image download and main file
This commit is contained in:
parent
fc546b2741
commit
fd9652bdec
7 changed files with 260 additions and 3 deletions
|
@ -9,5 +9,6 @@ dependencies = [
|
|||
"matplotlib>=3.10.6",
|
||||
"numpy>=2.3.2",
|
||||
"opencv-python>=4.11.0.86",
|
||||
"requests>=2.32.5",
|
||||
"scikit-image>=0.25.2",
|
||||
]
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
import logging
|
||||
import imdbinfo as imdb
|
||||
from structures import PathInfo, FileInfo, PathCategory, FileCategory
|
||||
from models import PathInfo, FileInfo, PathCategory, FileCategory
|
||||
from pathlib import Path
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
|
||||
import tmdb
|
||||
|
||||
|
||||
def classify_show(info: PathInfo) -> PathInfo:
|
||||
# Gather meta information for identifying episodes
|
||||
episode_durations: set[int] = set()
|
||||
|
@ -77,8 +80,15 @@ def classify_files(path: str) -> PathInfo | None:
|
|||
return None
|
||||
logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
|
||||
|
||||
tmdb_id = tmdb.search_show(imdb_entry.id, imdb_entry.title, year)
|
||||
|
||||
info = PathInfo(
|
||||
path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
|
||||
path=p,
|
||||
title=imdb_entry.title,
|
||||
year=year,
|
||||
imdb_id=imdb_entry.imdb_id,
|
||||
imdb=imdb_entry,
|
||||
tmdb_id=tmdb_id,
|
||||
)
|
||||
|
||||
# Identify category
|
||||
|
|
66
src/main.py
Normal file
66
src/main.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
from pathlib import Path
|
||||
|
||||
from file_classifier import classify_files
|
||||
from match_episodes import match_episodes_to_references
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
from models import PathCategory
|
||||
import tmdb
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
info = classify_files(args.input)
|
||||
|
||||
if info is None:
|
||||
logging.error("Could not classify files.")
|
||||
return
|
||||
|
||||
if info.category != PathCategory.SHOW:
|
||||
logging.error(
|
||||
f"Directory not recognized as SHOW, but as [{info.category}] instead. Only SHOW supported at the moment."
|
||||
)
|
||||
return
|
||||
|
||||
# ==== Process SHOW ====
|
||||
if info.episodes is None:
|
||||
logging.error(
|
||||
"Episodes could not be identified, no reference matching possible."
|
||||
)
|
||||
return
|
||||
|
||||
if info.tmdb_id is None:
|
||||
logging.error("TMDB entry not identified, cannot find reference images.")
|
||||
return
|
||||
|
||||
# Match episodes to references
|
||||
references = tmdb.download_episode_images(info.tmdb_id)
|
||||
matches = match_episodes_to_references(
|
||||
[str(f.path.absolute()) for f in info.episodes], references.flatten()
|
||||
)
|
||||
|
||||
# Set new episode names
|
||||
# TODO: Resolve matching results
|
||||
|
||||
# Rename files
|
||||
# TODO: Rename files
|
||||
|
||||
logging.info(f"Finished processing [{info.path}].")
|
||||
|
||||
|
||||
def args_parser() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Automatic renamer for ripped DVD and Blu-Ray files."
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
type=Path,
|
||||
help="Path to directory of a ripped movie or show, already renamed to be easily identifiable.",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = args_parser()
|
||||
main(args)
|
|
@ -6,6 +6,37 @@ from os import path
|
|||
import subprocess
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReferenceShowImages:
|
||||
images: dict[str, dict[str, list[np.ndarray]]]
|
||||
"""All reference images sorted by Season > Episode > Images."""
|
||||
|
||||
def get_season(self, season: str) -> dict[str, list[np.ndarray]]:
|
||||
season = str(season)
|
||||
if season not in self.images.keys():
|
||||
raise ValueError(f"Season [{season}] not found in reference images.")
|
||||
return self.images[season]
|
||||
|
||||
def get_episode(self, season: str, episode: str) -> list[np.ndarray]:
|
||||
episodes = self.get_season(season)
|
||||
episode = str(episode)
|
||||
if episode not in episodes.keys():
|
||||
raise ValueError(
|
||||
f"Episode [{episode}] not found in reference images for season [{season}]."
|
||||
)
|
||||
return episodes[episode]
|
||||
|
||||
def flatten(self) -> dict[str, list[np.ndarray]]:
|
||||
"""Collapse Seasons and episodes into a shared ID."""
|
||||
return {
|
||||
f"S[{season}]E[{episode}]": imgs
|
||||
for season, eps in self.images.items()
|
||||
for episode, imgs in eps.items()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Resolution:
|
||||
|
@ -178,10 +209,14 @@ class PathInfo:
|
|||
imdb: MovieBriefInfo
|
||||
"""IMDB info object referencing media."""
|
||||
|
||||
tmdb_id: str | None
|
||||
"""TMDB id, or None if not identified."""
|
||||
|
||||
category: PathCategory = PathCategory.UNCLASSIFIED
|
||||
"""Category of path media."""
|
||||
|
||||
is_bluray_quality: bool = False
|
||||
episodes: list[FileInfo] | None = None
|
||||
"""List of episodes if SHOW and episodes identified. Otherwise None."""
|
||||
|
||||
files: list[FileInfo] = field(default_factory=list)
|
||||
"""List of all files in the path."""
|
101
src/tmdb.py
Normal file
101
src/tmdb.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from models import ReferenceShowImages
|
||||
|
||||
# ==== CONFIGURATION ====
|
||||
TMDB_API_KEY = "b7006350eb3eeb4cf7d9cb6db44cdc0b" # <-- Replace with your TMDB API key
|
||||
BASE_URL = "https://api.themoviedb.org/3"
|
||||
IMG_BASE = "https://image.tmdb.org/t/p/original"
|
||||
|
||||
|
||||
def tmdb_request(endpoint: str, params: dict = {}):
|
||||
"""Helper to query TMDB API with authentication."""
|
||||
params["api_key"] = TMDB_API_KEY
|
||||
response = requests.get(f"{BASE_URL}{endpoint}", params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def search_show(imdb_id: str, title: str, year: int) -> str | None:
|
||||
"""Find TMDB ID by IMDb ID first, fallback to title/year. Returns TMDB id if successful, otherwise None."""
|
||||
# Try external source (IMDb ID)
|
||||
try:
|
||||
res = tmdb_request("/find/" + imdb_id, {"external_source": "imdb_id"})
|
||||
if res.get("tv_results"):
|
||||
return res["tv_results"][0]["id"]
|
||||
except Exception as e:
|
||||
logging.warning(
|
||||
"TMDB lookup with IMDB ID failed for SHOW, falling back to search:", e
|
||||
)
|
||||
|
||||
# Fallback to title/year search
|
||||
res = tmdb_request("/search/tv", {"query": title, "first_air_date_year": year})
|
||||
if res.get("results"):
|
||||
return res["results"][0]["id"]
|
||||
|
||||
logging.error(f"Unable to find show for title [{title}] and year [{year}].")
|
||||
return None
|
||||
|
||||
|
||||
def download_image(img_path: str) -> np.ndarray | None:
|
||||
"""Download a single image from TMDB and return as numpy array (BGR). On error returns None."""
|
||||
url = IMG_BASE + img_path
|
||||
try:
|
||||
r = requests.get(url, stream=True, timeout=10)
|
||||
r.raise_for_status()
|
||||
img_array = np.frombuffer(r.content, np.uint8)
|
||||
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
logging.warning(f"cv2.imdecode failed for downloaded image [{url}].")
|
||||
return None
|
||||
return img
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to download image [{url}]: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def download_episode_images(
|
||||
tmdb_id: str, seasons: list[int] | None = None
|
||||
) -> ReferenceShowImages:
|
||||
"""Loop through all seasons and episodes, downloading images. Given an TMDB id."""
|
||||
show_details = tmdb_request(f"/tv/{tmdb_id}")
|
||||
|
||||
# Download images for seasons
|
||||
season_episode_images = {}
|
||||
for season in show_details.get("seasons", []):
|
||||
season_number = season["season_number"]
|
||||
if seasons is not None and season_number not in seasons:
|
||||
continue
|
||||
|
||||
logging.info(f"Fetching season [{season_number}] images.")
|
||||
season_episode_images[season_number] = {}
|
||||
season_details = tmdb_request(f"/tv/{tmdb_id}/season/{season_number}")
|
||||
|
||||
# Download images for episodes
|
||||
for episode in season_details.get("episodes", []):
|
||||
ep_num = episode["episode_number"]
|
||||
season_episode_images[season_number][ep_num] = []
|
||||
|
||||
# Fetch episode images
|
||||
images = tmdb_request(
|
||||
f"/tv/{tmdb_id}/season/{season_number}/episode/{ep_num}/images"
|
||||
)
|
||||
|
||||
for idx, still in enumerate(images.get("stills", [])):
|
||||
image = download_image(still["file_path"])
|
||||
if image is not None:
|
||||
season_episode_images[season_number][ep_num].append(image)
|
||||
|
||||
return ReferenceShowImages(season_episode_images)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# tv_id = search_show(IMDB_ID, TITLE, YEAR)
|
||||
# if tv_id:
|
||||
# print(f"Found TMDB TV ID: {tv_id}")
|
||||
# download_episode_images(tv_id)
|
||||
# else:
|
||||
# print("Could not find show on TMDB.")
|
44
uv.lock
44
uv.lock
|
@ -16,6 +16,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2025.8.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
version = "3.4.3"
|
||||
|
@ -195,6 +204,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.10"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "imageio"
|
||||
version = "2.37.0"
|
||||
|
@ -774,6 +792,7 @@ dependencies = [
|
|||
{ name = "matplotlib" },
|
||||
{ name = "numpy" },
|
||||
{ name = "opencv-python" },
|
||||
{ name = "requests" },
|
||||
{ name = "scikit-image" },
|
||||
]
|
||||
|
||||
|
@ -783,9 +802,25 @@ requires-dist = [
|
|||
{ name = "matplotlib", specifier = ">=3.10.6" },
|
||||
{ name = "numpy", specifier = ">=2.3.2" },
|
||||
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
||||
{ name = "requests", specifier = ">=2.32.5" },
|
||||
{ name = "scikit-image", specifier = ">=0.25.2" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.32.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "idna" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scikit-image"
|
||||
version = "0.25.2"
|
||||
|
@ -913,6 +948,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.5.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3-future"
|
||||
version = "2.13.908"
|
||||
|
|
Loading…
Reference in a new issue