Some refactoring and implemented TMDB reference image download and main file
This commit is contained in:
parent
fc546b2741
commit
fd9652bdec
7 changed files with 260 additions and 3 deletions
|
@ -9,5 +9,6 @@ dependencies = [
|
||||||
"matplotlib>=3.10.6",
|
"matplotlib>=3.10.6",
|
||||||
"numpy>=2.3.2",
|
"numpy>=2.3.2",
|
||||||
"opencv-python>=4.11.0.86",
|
"opencv-python>=4.11.0.86",
|
||||||
|
"requests>=2.32.5",
|
||||||
"scikit-image>=0.25.2",
|
"scikit-image>=0.25.2",
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
import logging
|
import logging
|
||||||
import imdbinfo as imdb
|
import imdbinfo as imdb
|
||||||
from structures import PathInfo, FileInfo, PathCategory, FileCategory
|
from models import PathInfo, FileInfo, PathCategory, FileCategory
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
import tmdb
|
||||||
|
|
||||||
|
|
||||||
def classify_show(info: PathInfo) -> PathInfo:
|
def classify_show(info: PathInfo) -> PathInfo:
|
||||||
# Gather meta information for identifying episodes
|
# Gather meta information for identifying episodes
|
||||||
episode_durations: set[int] = set()
|
episode_durations: set[int] = set()
|
||||||
|
@ -77,8 +80,15 @@ def classify_files(path: str) -> PathInfo | None:
|
||||||
return None
|
return None
|
||||||
logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
|
logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
|
||||||
|
|
||||||
|
tmdb_id = tmdb.search_show(imdb_entry.id, imdb_entry.title, year)
|
||||||
|
|
||||||
info = PathInfo(
|
info = PathInfo(
|
||||||
path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
|
path=p,
|
||||||
|
title=imdb_entry.title,
|
||||||
|
year=year,
|
||||||
|
imdb_id=imdb_entry.imdb_id,
|
||||||
|
imdb=imdb_entry,
|
||||||
|
tmdb_id=tmdb_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Identify category
|
# Identify category
|
||||||
|
|
66
src/main.py
Normal file
66
src/main.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from file_classifier import classify_files
|
||||||
|
from match_episodes import match_episodes_to_references
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from models import PathCategory
|
||||||
|
import tmdb
|
||||||
|
|
||||||
|
|
||||||
|
def main(args: argparse.Namespace):
|
||||||
|
info = classify_files(args.input)
|
||||||
|
|
||||||
|
if info is None:
|
||||||
|
logging.error("Could not classify files.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if info.category != PathCategory.SHOW:
|
||||||
|
logging.error(
|
||||||
|
f"Directory not recognized as SHOW, but as [{info.category}] instead. Only SHOW supported at the moment."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# ==== Process SHOW ====
|
||||||
|
if info.episodes is None:
|
||||||
|
logging.error(
|
||||||
|
"Episodes could not be identified, no reference matching possible."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if info.tmdb_id is None:
|
||||||
|
logging.error("TMDB entry not identified, cannot find reference images.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Match episodes to references
|
||||||
|
references = tmdb.download_episode_images(info.tmdb_id)
|
||||||
|
matches = match_episodes_to_references(
|
||||||
|
[str(f.path.absolute()) for f in info.episodes], references.flatten()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set new episode names
|
||||||
|
# TODO: Resolve matching results
|
||||||
|
|
||||||
|
# Rename files
|
||||||
|
# TODO: Rename files
|
||||||
|
|
||||||
|
logging.info(f"Finished processing [{info.path}].")
|
||||||
|
|
||||||
|
|
||||||
|
def args_parser() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Automatic renamer for ripped DVD and Blu-Ray files."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"input",
|
||||||
|
type=Path,
|
||||||
|
help="Path to directory of a ripped movie or show, already renamed to be easily identifiable.",
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = args_parser()
|
||||||
|
main(args)
|
|
@ -6,6 +6,37 @@ from os import path
|
||||||
import subprocess
|
import subprocess
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ReferenceShowImages:
|
||||||
|
images: dict[str, dict[str, list[np.ndarray]]]
|
||||||
|
"""All reference images sorted by Season > Episode > Images."""
|
||||||
|
|
||||||
|
def get_season(self, season: str) -> dict[str, list[np.ndarray]]:
|
||||||
|
season = str(season)
|
||||||
|
if season not in self.images.keys():
|
||||||
|
raise ValueError(f"Season [{season}] not found in reference images.")
|
||||||
|
return self.images[season]
|
||||||
|
|
||||||
|
def get_episode(self, season: str, episode: str) -> list[np.ndarray]:
|
||||||
|
episodes = self.get_season(season)
|
||||||
|
episode = str(episode)
|
||||||
|
if episode not in episodes.keys():
|
||||||
|
raise ValueError(
|
||||||
|
f"Episode [{episode}] not found in reference images for season [{season}]."
|
||||||
|
)
|
||||||
|
return episodes[episode]
|
||||||
|
|
||||||
|
def flatten(self) -> dict[str, list[np.ndarray]]:
|
||||||
|
"""Collapse Seasons and episodes into a shared ID."""
|
||||||
|
return {
|
||||||
|
f"S[{season}]E[{episode}]": imgs
|
||||||
|
for season, eps in self.images.items()
|
||||||
|
for episode, imgs in eps.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Resolution:
|
class Resolution:
|
||||||
|
@ -178,10 +209,14 @@ class PathInfo:
|
||||||
imdb: MovieBriefInfo
|
imdb: MovieBriefInfo
|
||||||
"""IMDB info object referencing media."""
|
"""IMDB info object referencing media."""
|
||||||
|
|
||||||
|
tmdb_id: str | None
|
||||||
|
"""TMDB id, or None if not identified."""
|
||||||
|
|
||||||
category: PathCategory = PathCategory.UNCLASSIFIED
|
category: PathCategory = PathCategory.UNCLASSIFIED
|
||||||
"""Category of path media."""
|
"""Category of path media."""
|
||||||
|
|
||||||
is_bluray_quality: bool = False
|
episodes: list[FileInfo] | None = None
|
||||||
|
"""List of episodes if SHOW and episodes identified. Otherwise None."""
|
||||||
|
|
||||||
files: list[FileInfo] = field(default_factory=list)
|
files: list[FileInfo] = field(default_factory=list)
|
||||||
"""List of all files in the path."""
|
"""List of all files in the path."""
|
101
src/tmdb.py
Normal file
101
src/tmdb.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import requests
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from models import ReferenceShowImages
|
||||||
|
|
||||||
|
# ==== CONFIGURATION ====
|
||||||
|
TMDB_API_KEY = "b7006350eb3eeb4cf7d9cb6db44cdc0b" # <-- Replace with your TMDB API key
|
||||||
|
BASE_URL = "https://api.themoviedb.org/3"
|
||||||
|
IMG_BASE = "https://image.tmdb.org/t/p/original"
|
||||||
|
|
||||||
|
|
||||||
|
def tmdb_request(endpoint: str, params: dict = {}):
|
||||||
|
"""Helper to query TMDB API with authentication."""
|
||||||
|
params["api_key"] = TMDB_API_KEY
|
||||||
|
response = requests.get(f"{BASE_URL}{endpoint}", params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
def search_show(imdb_id: str, title: str, year: int) -> str | None:
|
||||||
|
"""Find TMDB ID by IMDb ID first, fallback to title/year. Returns TMDB id if successful, otherwise None."""
|
||||||
|
# Try external source (IMDb ID)
|
||||||
|
try:
|
||||||
|
res = tmdb_request("/find/" + imdb_id, {"external_source": "imdb_id"})
|
||||||
|
if res.get("tv_results"):
|
||||||
|
return res["tv_results"][0]["id"]
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(
|
||||||
|
"TMDB lookup with IMDB ID failed for SHOW, falling back to search:", e
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fallback to title/year search
|
||||||
|
res = tmdb_request("/search/tv", {"query": title, "first_air_date_year": year})
|
||||||
|
if res.get("results"):
|
||||||
|
return res["results"][0]["id"]
|
||||||
|
|
||||||
|
logging.error(f"Unable to find show for title [{title}] and year [{year}].")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download_image(img_path: str) -> np.ndarray | None:
|
||||||
|
"""Download a single image from TMDB and return as numpy array (BGR). On error returns None."""
|
||||||
|
url = IMG_BASE + img_path
|
||||||
|
try:
|
||||||
|
r = requests.get(url, stream=True, timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
|
img_array = np.frombuffer(r.content, np.uint8)
|
||||||
|
img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
||||||
|
if img is None:
|
||||||
|
logging.warning(f"cv2.imdecode failed for downloaded image [{url}].")
|
||||||
|
return None
|
||||||
|
return img
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to download image [{url}]: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download_episode_images(
|
||||||
|
tmdb_id: str, seasons: list[int] | None = None
|
||||||
|
) -> ReferenceShowImages:
|
||||||
|
"""Loop through all seasons and episodes, downloading images. Given an TMDB id."""
|
||||||
|
show_details = tmdb_request(f"/tv/{tmdb_id}")
|
||||||
|
|
||||||
|
# Download images for seasons
|
||||||
|
season_episode_images = {}
|
||||||
|
for season in show_details.get("seasons", []):
|
||||||
|
season_number = season["season_number"]
|
||||||
|
if seasons is not None and season_number not in seasons:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info(f"Fetching season [{season_number}] images.")
|
||||||
|
season_episode_images[season_number] = {}
|
||||||
|
season_details = tmdb_request(f"/tv/{tmdb_id}/season/{season_number}")
|
||||||
|
|
||||||
|
# Download images for episodes
|
||||||
|
for episode in season_details.get("episodes", []):
|
||||||
|
ep_num = episode["episode_number"]
|
||||||
|
season_episode_images[season_number][ep_num] = []
|
||||||
|
|
||||||
|
# Fetch episode images
|
||||||
|
images = tmdb_request(
|
||||||
|
f"/tv/{tmdb_id}/season/{season_number}/episode/{ep_num}/images"
|
||||||
|
)
|
||||||
|
|
||||||
|
for idx, still in enumerate(images.get("stills", [])):
|
||||||
|
image = download_image(still["file_path"])
|
||||||
|
if image is not None:
|
||||||
|
season_episode_images[season_number][ep_num].append(image)
|
||||||
|
|
||||||
|
return ReferenceShowImages(season_episode_images)
|
||||||
|
|
||||||
|
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# tv_id = search_show(IMDB_ID, TITLE, YEAR)
|
||||||
|
# if tv_id:
|
||||||
|
# print(f"Found TMDB TV ID: {tv_id}")
|
||||||
|
# download_episode_images(tv_id)
|
||||||
|
# else:
|
||||||
|
# print("Could not find show on TMDB.")
|
44
uv.lock
44
uv.lock
|
@ -16,6 +16,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
|
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "certifi"
|
||||||
|
version = "2025.8.3"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "charset-normalizer"
|
name = "charset-normalizer"
|
||||||
version = "3.4.3"
|
version = "3.4.3"
|
||||||
|
@ -195,6 +204,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
|
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna"
|
||||||
|
version = "3.10"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "imageio"
|
name = "imageio"
|
||||||
version = "2.37.0"
|
version = "2.37.0"
|
||||||
|
@ -774,6 +792,7 @@ dependencies = [
|
||||||
{ name = "matplotlib" },
|
{ name = "matplotlib" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "opencv-python" },
|
{ name = "opencv-python" },
|
||||||
|
{ name = "requests" },
|
||||||
{ name = "scikit-image" },
|
{ name = "scikit-image" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -783,9 +802,25 @@ requires-dist = [
|
||||||
{ name = "matplotlib", specifier = ">=3.10.6" },
|
{ name = "matplotlib", specifier = ">=3.10.6" },
|
||||||
{ name = "numpy", specifier = ">=2.3.2" },
|
{ name = "numpy", specifier = ">=2.3.2" },
|
||||||
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
||||||
|
{ name = "requests", specifier = ">=2.32.5" },
|
||||||
{ name = "scikit-image", specifier = ">=0.25.2" },
|
{ name = "scikit-image", specifier = ">=0.25.2" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "requests"
|
||||||
|
version = "2.32.5"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "certifi" },
|
||||||
|
{ name = "charset-normalizer" },
|
||||||
|
{ name = "idna" },
|
||||||
|
{ name = "urllib3" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scikit-image"
|
name = "scikit-image"
|
||||||
version = "0.25.2"
|
version = "0.25.2"
|
||||||
|
@ -913,6 +948,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
|
{ url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "urllib3"
|
||||||
|
version = "2.5.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3-future"
|
name = "urllib3-future"
|
||||||
version = "2.13.908"
|
version = "2.13.908"
|
||||||
|
|
Loading…
Reference in a new issue