A bit more progress for unified episode identification

This commit is contained in:
Maximilian Giller 2025-09-19 11:22:39 +02:00
parent d21ceb6a71
commit 018421dfab

View file

@ -1,11 +1,15 @@
from concurrent.futures import ProcessPoolExecutor, as_completed
from dataclasses import dataclass, field
import math
import os
from pathlib import Path
import re
import numpy as np
import cv2
import logging
import imdbinfo as imdb
from models import PathInfo
from models import FileCategory, FileInfo, PathInfo
import tmdb
@ -242,54 +246,45 @@ def match_episodes_to_references(
def process_show(info: PathInfo) -> PathInfo:
# Gather meta information for identifying episodes
episode_durations: set[int] = set()
episode_durations: set[int] = (
set()
) # TODO: Store duration per specific episode for unique duration identification
episode_count = 0 # TODO: Per season
for ep in imdb.get_all_episodes(info.imdb.imdb_id):
episode_count += 1
if ep.duration:
episode_durations.add(int(ep.duration / 60))
logging.debug(episode_durations)
# Go over all files
count = 0
for root, dirs, files in os.walk(info.path):
for filename in files:
filepath = Path(os.path.join(root, filename))
file = FileInfo(filepath, info)
if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
continue
if (
math.floor(file.duration_in_seconds / 60) in episode_durations
or math.ceil(file.duration_in_seconds / 60) in episode_durations
):
print(f"{filename} {file.duration_in_seconds / 60} EPISODE")
count += 1
else:
print(f"{filename} {file.duration_in_seconds / 60}")
logging.info(f"Identified [{count}] episodes.")
if info.episodes is None:
logging.error(
"Episodes could not be identified, no reference matching possible."
)
return info
logging.debug(f"Episode durations: {episode_durations}")
# Get reference images for episode identification
if info.tmdb_id is None:
logging.error("TMDB entry not identified, cannot find reference images.")
return info
# Match episodes to references
references = tmdb.download_episode_images(info.tmdb_id)
# Go over all files
for root, dirs, files in os.walk(info.path):
# TODO: Detect season in dirs
for filename in files:
filepath = Path(os.path.join(root, filename))
file = FileInfo(filepath, info)
info.files.append(file)
# Is file an episode?
if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
continue
elif (
math.floor(file.duration_in_seconds / 60) not in episode_durations
and math.ceil(file.duration_in_seconds / 60) not in episode_durations
):
continue
# ==== Handle episode ====
file.category = FileCategory.EPISODE
# TODO: Match episode to references
matches = match_episodes_to_references(
[
str(f.path.absolute())
@ -298,7 +293,10 @@ def process_show(info: PathInfo) -> PathInfo:
],
references.flatten(),
)
logging.info(matches)
if info.episodes is None:
logging.error("Episodes could not be identified.")
return info
# Set new episode names
if not matches.perfect_match or matches.reference_by_episode is None: