2025-09-11 04:59:50 +02:00
|
|
|
import logging
|
|
|
|
import imdbinfo as imdb
|
|
|
|
from structures import PathInfo, FileInfo, PathCategory, FileCategory
|
2025-09-08 08:03:25 +02:00
|
|
|
from pathlib import Path
|
2025-09-11 01:36:24 +02:00
|
|
|
import os
|
2025-09-11 04:59:50 +02:00
|
|
|
import re
|
|
|
|
import subprocess
|
|
|
|
import json
|
|
|
|
import math
|
2025-09-07 18:29:38 +02:00
|
|
|
|
|
|
|
|
2025-09-11 04:59:50 +02:00
|
|
|
def get_metadata(filepath: Path) -> dict:
|
|
|
|
cmd = [
|
|
|
|
"ffprobe",
|
|
|
|
"-v",
|
|
|
|
"quiet",
|
|
|
|
"-print_format",
|
|
|
|
"json",
|
|
|
|
"-show_format",
|
|
|
|
"-show_streams",
|
|
|
|
filepath,
|
|
|
|
]
|
|
|
|
result = subprocess.run(
|
|
|
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
|
|
)
|
|
|
|
return json.loads(result.stdout)
|
2025-09-11 01:36:24 +02:00
|
|
|
|
2025-09-11 04:59:50 +02:00
|
|
|
|
2025-09-12 18:51:26 +02:00
|
|
|
def classify_show(info: PathInfo) -> PathInfo:
|
|
|
|
# Prepare meta information
|
|
|
|
episode_durations: list[int] = []
|
|
|
|
movie_information = None
|
|
|
|
if info.category == PathCategory.SHOW:
|
|
|
|
for ep in imdb.get_all_episodes(info.imdb.imdb_id):
|
|
|
|
if ep.duration:
|
|
|
|
episode_durations.append(int(ep.duration / 60))
|
|
|
|
|
|
|
|
print(set(episode_durations))
|
|
|
|
# Go over all files
|
|
|
|
count = 0
|
|
|
|
for root, dirs, files in os.walk(info.path):
|
|
|
|
for filename in files:
|
|
|
|
filepath = Path(os.path.join(root, filename))
|
|
|
|
if filepath.suffix != ".mkv":
|
|
|
|
continue
|
|
|
|
m = get_metadata(filepath)
|
|
|
|
duration = int(float(m["format"]["duration"]) / 60)
|
|
|
|
if (
|
|
|
|
math.floor(duration) in episode_durations
|
|
|
|
or math.ceil(duration) in episode_durations
|
|
|
|
):
|
|
|
|
print(f"{filename} {duration} EPISODE")
|
|
|
|
count += 1
|
|
|
|
else:
|
|
|
|
print(f"{filename} {duration}")
|
|
|
|
|
|
|
|
print(count)
|
|
|
|
return info
|
|
|
|
|
|
|
|
|
|
|
|
def classify_movie(info: PathInfo) -> PathInfo:
|
|
|
|
logging.error(f"Movie classification not yet implemented.")
|
|
|
|
return info
|
|
|
|
|
|
|
|
|
2025-09-11 04:59:50 +02:00
|
|
|
def classify_files(path: str) -> PathInfo | None:
|
|
|
|
p = Path(path)
|
|
|
|
|
|
|
|
# Extract title and year
|
|
|
|
pattern = re.compile(r"^(?P<title>.+) \((?P<year>[0-9]{4})\)$")
|
|
|
|
match = pattern.match(p.name)
|
|
|
|
if not match:
|
|
|
|
logging.error(
|
|
|
|
f"Could not extract title and year from directory name [{p.name}]."
|
|
|
|
)
|
|
|
|
return None
|
|
|
|
title = match.group("title")
|
|
|
|
year = int(match.group("year"))
|
|
|
|
logging.info(f"Information extracted. Year: [{year}] Title: [{title}]")
|
|
|
|
|
|
|
|
# Fetch from IMDB
|
|
|
|
results = imdb.search_title(p.name)
|
|
|
|
if results is None:
|
|
|
|
logging.error(f"No IMDB results found for query [{p.name}].")
|
|
|
|
return None
|
|
|
|
imdb_entry = results.titles[0]
|
|
|
|
if imdb_entry.title.replace(":", "").replace("-", "") != title.replace(
|
|
|
|
":", ""
|
|
|
|
).replace("-", ""):
|
|
|
|
logging.error(
|
|
|
|
f"IMDB result title does not match. Expected: [{title}] Actual: [{imdb_entry.title}]"
|
|
|
|
)
|
|
|
|
return None
|
|
|
|
if imdb_entry.year != year:
|
|
|
|
logging.error(
|
|
|
|
f"IMDB result year does not match. Expected: [{year}] Actual: [{imdb_entry.year}]"
|
|
|
|
)
|
|
|
|
return None
|
|
|
|
logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
|
|
|
|
|
2025-09-12 18:51:26 +02:00
|
|
|
info = PathInfo(
|
|
|
|
path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
|
|
|
|
)
|
2025-09-11 04:59:50 +02:00
|
|
|
|
|
|
|
# Identify category
|
|
|
|
if imdb_entry.kind == "tvSeries":
|
|
|
|
info.category = PathCategory.SHOW
|
2025-09-12 18:51:26 +02:00
|
|
|
logging.info(f"Path identified as containing SHOW.")
|
|
|
|
info = classify_show(info)
|
2025-09-11 04:59:50 +02:00
|
|
|
elif imdb_entry.kind == "movie":
|
|
|
|
info.category = PathCategory.MOVIE
|
2025-09-12 18:51:26 +02:00
|
|
|
logging.info(f"Path identified as containing MOVIE.")
|
|
|
|
info = classify_movie(info)
|
2025-09-11 04:59:50 +02:00
|
|
|
else:
|
|
|
|
info.category = PathCategory.UNKNOWN
|
2025-09-12 18:51:26 +02:00
|
|
|
logging.error(
|
|
|
|
f"IMDB entry has unknown qualifier for content [{imdb_entry.kind}]."
|
|
|
|
)
|
2025-09-11 04:59:50 +02:00
|
|
|
|
|
|
|
return info
|
2025-09-07 18:29:38 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2025-09-08 08:03:25 +02:00
|
|
|
results = classify_files("/home/max/Media Library/testing/The Mentalist (2008)")
|
|
|
|
|
2025-09-07 18:29:38 +02:00
|
|
|
print(results)
|
2025-09-08 08:03:25 +02:00
|
|
|
print("Done.")
|