import logging
import imdbinfo as imdb
from structures import PathInfo, FileInfo, PathCategory, FileCategory
from pathlib import Path
import os
import re
import subprocess
import json
import math
def get_metadata(filepath: Path) -> dict:
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
filepath,
]
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
return json.loads(result.stdout)
def classify_show(info: PathInfo) -> PathInfo:
# Prepare meta information
episode_durations: list[int] = []
movie_information = None
if info.category == PathCategory.SHOW:
for ep in imdb.get_all_episodes(info.imdb.imdb_id):
if ep.duration:
episode_durations.append(int(ep.duration / 60))
print(set(episode_durations))
# Go over all files
count = 0
for root, dirs, files in os.walk(info.path):
for filename in files:
filepath = Path(os.path.join(root, filename))
if filepath.suffix != ".mkv":
continue
m = get_metadata(filepath)
duration = int(float(m["format"]["duration"]) / 60)
if (
math.floor(duration) in episode_durations
or math.ceil(duration) in episode_durations
):
print(f"{filename} {duration} EPISODE")
count += 1
else:
print(f"{filename} {duration}")
print(count)
return info
def classify_movie(info: PathInfo) -> PathInfo:
logging.error(f"Movie classification not yet implemented.")
return info
def classify_files(path: str) -> PathInfo | None:
p = Path(path)
# Extract title and year
pattern = re.compile(r"^(?P
.+) \((?P[0-9]{4})\)$")
match = pattern.match(p.name)
if not match:
logging.error(
f"Could not extract title and year from directory name [{p.name}]."
)
return None
title = match.group("title")
year = int(match.group("year"))
logging.info(f"Information extracted. Year: [{year}] Title: [{title}]")
# Fetch from IMDB
results = imdb.search_title(p.name)
if results is None:
logging.error(f"No IMDB results found for query [{p.name}].")
return None
imdb_entry = results.titles[0]
if imdb_entry.title.replace(":", "").replace("-", "") != title.replace(
":", ""
).replace("-", ""):
logging.error(
f"IMDB result title does not match. Expected: [{title}] Actual: [{imdb_entry.title}]"
)
return None
if imdb_entry.year != year:
logging.error(
f"IMDB result year does not match. Expected: [{year}] Actual: [{imdb_entry.year}]"
)
return None
logging.info(f"Found matching IMDB entry with id [{imdb_entry.imdb_id}].")
info = PathInfo(
path=p, title=title, year=year, imdb_id=imdb_entry.imdb_id, imdb=imdb_entry
)
# Identify category
if imdb_entry.kind == "tvSeries":
info.category = PathCategory.SHOW
logging.info(f"Path identified as containing SHOW.")
info = classify_show(info)
elif imdb_entry.kind == "movie":
info.category = PathCategory.MOVIE
logging.info(f"Path identified as containing MOVIE.")
info = classify_movie(info)
else:
info.category = PathCategory.UNKNOWN
logging.error(
f"IMDB entry has unknown qualifier for content [{imdb_entry.kind}]."
)
return info
if __name__ == "__main__":
results = classify_files("/home/max/Media Library/testing/The Mentalist (2008)")
print(results)
print("Done.")