import logging import imdbinfo as imdb from structures import PathInfo, FileInfo, PathCategory, FileCategory from pathlib import Path import os import re import subprocess import json import math def get_metadata(filepath: Path) -> dict: cmd = [ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", filepath, ] result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) return json.loads(result.stdout) def classify_show(info: PathInfo) -> PathInfo: # Prepare meta information episode_durations: list[int] = [] movie_information = None if info.category == PathCategory.SHOW: for ep in imdb.get_all_episodes(info.imdb.imdb_id): if ep.duration: episode_durations.append(int(ep.duration / 60)) print(set(episode_durations)) # Go over all files count = 0 for root, dirs, files in os.walk(info.path): for filename in files: filepath = Path(os.path.join(root, filename)) if filepath.suffix != ".mkv": continue m = get_metadata(filepath) duration = int(float(m["format"]["duration"]) / 60) if ( math.floor(duration) in episode_durations or math.ceil(duration) in episode_durations ): print(f"{filename} {duration} EPISODE") count += 1 else: print(f"{filename} {duration}") print(count) return info def classify_movie(info: PathInfo) -> PathInfo: logging.error(f"Movie classification not yet implemented.") return info def classify_files(path: str) -> PathInfo | None: p = Path(path) # Extract title and year pattern = re.compile(r"^(?P.+) \((?P<year>[0-9]{4})\)$") match = pattern.match(p.name) if not match: logging.error( f"Could not extract title and year from directory name [{p.name}]." ) return None title = match.group("title") year = int(match.group("year")) logging.info(f"Information extracted. Year: [{year}] Title: [{title}]") # Fetch from IMDB results = imdb.search_title(p.name) if results is None: logging.error(f"No IMDB results found for query [{p.name}].") return None imdb_entry = results.titles[0] if imdb_entry.title.replace(":", "").replace("-", "") != title.replace( ":", "" ).replace("-", ""): logging.error( f"IMDB result title does not match. Expected: [{title}] Actual: [{imdb_entry.title}]" ) return None if imdb_entry.year != year: logging.error( f"IMDB result year does not match. Expected: [{year}] Actual: [{imdb_entry.year}]" ) return None logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].") info = PathInfo( path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry ) # Identify category if imdb_entry.kind == "tvSeries": info.category = PathCategory.SHOW logging.info(f"Path identified as containing SHOW.") info = classify_show(info) elif imdb_entry.kind == "movie": info.category = PathCategory.MOVIE logging.info(f"Path identified as containing MOVIE.") info = classify_movie(info) else: info.category = PathCategory.UNKNOWN logging.error( f"IMDB entry has unknown qualifier for content [{imdb_entry.kind}]." ) return info if __name__ == "__main__": results = classify_files("/home/max/Media Library/testing/The Mentalist (2008)") print(results) print("Done.")