Improved metadata implementation

This commit is contained in:
Maximilian Giller 2025-09-12 19:26:32 +02:00
parent 979cb981b9
commit d0edc94f95
2 changed files with 99 additions and 38 deletions

View file

@ -4,57 +4,36 @@ from structures import PathInfo, FileInfo, PathCategory, FileCategory
from pathlib import Path
import os
import re
import subprocess
import json
import math
def get_metadata(filepath: Path) -> dict:
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
filepath,
]
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
return json.loads(result.stdout)
def classify_show(info: PathInfo) -> PathInfo:
# Prepare meta information
episode_durations: list[int] = []
movie_information = None
if info.category == PathCategory.SHOW:
# Gather meta information for identifying episodes
episode_durations: set[int] = set()
for ep in imdb.get_all_episodes(info.imdb.imdb_id):
if ep.duration:
episode_durations.append(int(ep.duration / 60))
episode_durations.add(int(ep.duration / 60))
logging.debug(episode_durations)
print(set(episode_durations))
# Go over all files
count = 0
for root, dirs, files in os.walk(info.path):
for filename in files:
filepath = Path(os.path.join(root, filename))
if filepath.suffix != ".mkv":
file = FileInfo(filepath, info)
if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
continue
m = get_metadata(filepath)
duration = int(float(m["format"]["duration"]) / 60)
if (
math.floor(duration) in episode_durations
or math.ceil(duration) in episode_durations
math.floor(file.duration_in_seconds / 60) in episode_durations
or math.ceil(file.duration_in_seconds / 60) in episode_durations
):
print(f"{filename} {duration} EPISODE")
print(f"{filename} {file.duration_in_seconds / 60} EPISODE")
count += 1
else:
print(f"{filename} {duration}")
print(f"{filename} {file.duration_in_seconds / 60}")
print(count)
logging.info(f"Identified [{count}] episodes.")
return info

View file

@ -3,6 +3,37 @@ from enum import Enum
from pathlib import Path
from imdbinfo.models import MovieBriefInfo
from os import path
import subprocess
import json
@dataclass
class Resolution:
width: int
"""Horizontal resolution."""
height: int
"""Vertical resolution."""
@property
def w(self) -> int:
"""Horizontal resolution."""
return self.width
@property
def h(self) -> int:
"""Vertical resolution."""
return self.height
@property
def x(self) -> int:
"""Horizontal resolution."""
return self.width
@property
def y(self) -> int:
"""Vertical resolution."""
return self.height
class FileCategory(Enum):
@ -47,7 +78,7 @@ class PathCategory(Enum):
@dataclass
class FileInfo:
original_path: Path
path: Path
"""Original Path to file, before any processing."""
parent_path: "PathInfo"
@ -65,16 +96,67 @@ class FileInfo:
episode_no: int | None = None
"""Episode number in case if category is EPISODE."""
duration_in_seconds: int | None = None
"""Duration of potential video file in seconds."""
video_stream: dict | None = None
"""Meta information about the first video stream found."""
@property
def video_bitrate(self) -> float | None:
"""Bitrate of video in bps (bits per second)."""
if self.video_stream is None:
return None
return self.video_stream["bit_rate"]
@property
def resolution(self) -> Resolution | None:
"""Resolution of a possible video stream."""
if self.video_stream is None:
return None
return Resolution(
width=int(self.video_stream["width"]),
height=int(self.video_stream["height"]),
)
def __post_init__(self):
"""Read basic metadata often referenced for processing."""
m = self.read_metadata()
if m["format"]["duration"]:
self.duration_in_seconds = int(m["format"]["duration"])
self.video_stream = next(
(s for s in m["streams"] if s["codec_type"] == "video"), None
)
@property
def new_path(self) -> Path:
"""New Path."""
return Path(
path.join(
self.parent_path.path,
f"{self.new_file_name}{self.original_path.suffix}",
f"{self.new_file_name}{self.path.suffix}",
)
)
def read_metadata(self) -> dict:
"""Reads metadata using ffprobe."""
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
self.path,
]
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
return json.loads(result.stdout)
@dataclass
class PathInfo: