A bit more progress for unified episode identification

This commit is contained in:
Maximilian Giller 2025-09-19 11:22:39 +02:00
parent d21ceb6a71
commit 018421dfab

View file

@ -1,11 +1,15 @@
from concurrent.futures import ProcessPoolExecutor, as_completed from concurrent.futures import ProcessPoolExecutor, as_completed
from dataclasses import dataclass, field from dataclasses import dataclass, field
import math
import os
from pathlib import Path
import re import re
import numpy as np import numpy as np
import cv2 import cv2
import logging import logging
import imdbinfo as imdb
from models import PathInfo from models import FileCategory, FileInfo, PathInfo
import tmdb import tmdb
@ -242,63 +246,57 @@ def match_episodes_to_references(
def process_show(info: PathInfo) -> PathInfo: def process_show(info: PathInfo) -> PathInfo:
# Gather meta information for identifying episodes # Gather meta information for identifying episodes
episode_durations: set[int] = set() episode_durations: set[int] = (
set()
) # TODO: Store duration per specific episode for unique duration identification
episode_count = 0 # TODO: Per season
for ep in imdb.get_all_episodes(info.imdb.imdb_id): for ep in imdb.get_all_episodes(info.imdb.imdb_id):
episode_count += 1
if ep.duration: if ep.duration:
episode_durations.add(int(ep.duration / 60)) episode_durations.add(int(ep.duration / 60))
logging.debug(episode_durations) logging.debug(f"Episode durations: {episode_durations}")
# Go over all files
count = 0
for root, dirs, files in os.walk(info.path):
for filename in files:
filepath = Path(os.path.join(root, filename))
file = FileInfo(filepath, info)
if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
continue
if (
math.floor(file.duration_in_seconds / 60) in episode_durations
or math.ceil(file.duration_in_seconds / 60) in episode_durations
):
print(f"{filename} {file.duration_in_seconds / 60} EPISODE")
count += 1
else:
print(f"{filename} {file.duration_in_seconds / 60}")
logging.info(f"Identified [{count}] episodes.")
if info.episodes is None:
logging.error(
"Episodes could not be identified, no reference matching possible."
)
return info
# Get reference images for episode identification
if info.tmdb_id is None: if info.tmdb_id is None:
logging.error("TMDB entry not identified, cannot find reference images.") logging.error("TMDB entry not identified, cannot find reference images.")
return info return info
# Match episodes to references
references = tmdb.download_episode_images(info.tmdb_id) references = tmdb.download_episode_images(info.tmdb_id)
matches = match_episodes_to_references(
[ # Go over all files
str(f.path.absolute()) for root, dirs, files in os.walk(info.path):
for f in info.episodes # TODO: Detect season in dirs
if f.episode is None or f.season is None
], for filename in files:
references.flatten(), filepath = Path(os.path.join(root, filename))
) file = FileInfo(filepath, info)
logging.info(matches) info.files.append(file)
# Is file an episode?
if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
continue
elif (
math.floor(file.duration_in_seconds / 60) not in episode_durations
and math.ceil(file.duration_in_seconds / 60) not in episode_durations
):
continue
# ==== Handle episode ====
file.category = FileCategory.EPISODE
# TODO: Match episode to references
matches = match_episodes_to_references(
[
str(f.path.absolute())
for f in info.episodes
if f.episode is None or f.season is None
],
references.flatten(),
)
if info.episodes is None:
logging.error("Episodes could not be identified.")
return info
# Set new episode names # Set new episode names
if not matches.perfect_match or matches.reference_by_episode is None: if not matches.perfect_match or matches.reference_by_episode is None: