Improved metadata implementation

This commit is contained in:
Maximilian Giller 2025-09-12 19:26:32 +02:00
parent 979cb981b9
commit d0edc94f95
2 changed files with 99 additions and 38 deletions

View file

@ -4,57 +4,36 @@ from structures import PathInfo, FileInfo, PathCategory, FileCategory
from pathlib import Path from pathlib import Path
import os import os
import re import re
import subprocess
import json
import math import math
def get_metadata(filepath: Path) -> dict:
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
filepath,
]
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
return json.loads(result.stdout)
def classify_show(info: PathInfo) -> PathInfo: def classify_show(info: PathInfo) -> PathInfo:
# Prepare meta information # Gather meta information for identifying episodes
episode_durations: list[int] = [] episode_durations: set[int] = set()
movie_information = None for ep in imdb.get_all_episodes(info.imdb.imdb_id):
if info.category == PathCategory.SHOW: if ep.duration:
for ep in imdb.get_all_episodes(info.imdb.imdb_id): episode_durations.add(int(ep.duration / 60))
if ep.duration:
episode_durations.append(int(ep.duration / 60)) logging.debug(episode_durations)
print(set(episode_durations))
# Go over all files # Go over all files
count = 0 count = 0
for root, dirs, files in os.walk(info.path): for root, dirs, files in os.walk(info.path):
for filename in files: for filename in files:
filepath = Path(os.path.join(root, filename)) filepath = Path(os.path.join(root, filename))
if filepath.suffix != ".mkv": file = FileInfo(filepath, info)
if file.path.suffix != ".mkv" or file.duration_in_seconds is None:
continue continue
m = get_metadata(filepath)
duration = int(float(m["format"]["duration"]) / 60)
if ( if (
math.floor(duration) in episode_durations math.floor(file.duration_in_seconds / 60) in episode_durations
or math.ceil(duration) in episode_durations or math.ceil(file.duration_in_seconds / 60) in episode_durations
): ):
print(f"{filename} {duration} EPISODE") print(f"{filename} {file.duration_in_seconds / 60} EPISODE")
count += 1 count += 1
else: else:
print(f"{filename} {duration}") print(f"{filename} {file.duration_in_seconds / 60}")
print(count) logging.info(f"Identified [{count}] episodes.")
return info return info

View file

@ -3,6 +3,37 @@ from enum import Enum
from pathlib import Path from pathlib import Path
from imdbinfo.models import MovieBriefInfo from imdbinfo.models import MovieBriefInfo
from os import path from os import path
import subprocess
import json
@dataclass
class Resolution:
width: int
"""Horizontal resolution."""
height: int
"""Vertical resolution."""
@property
def w(self) -> int:
"""Horizontal resolution."""
return self.width
@property
def h(self) -> int:
"""Vertical resolution."""
return self.height
@property
def x(self) -> int:
"""Horizontal resolution."""
return self.width
@property
def y(self) -> int:
"""Vertical resolution."""
return self.height
class FileCategory(Enum): class FileCategory(Enum):
@ -47,7 +78,7 @@ class PathCategory(Enum):
@dataclass @dataclass
class FileInfo: class FileInfo:
original_path: Path path: Path
"""Original Path to file, before any processing.""" """Original Path to file, before any processing."""
parent_path: "PathInfo" parent_path: "PathInfo"
@ -65,16 +96,67 @@ class FileInfo:
episode_no: int | None = None episode_no: int | None = None
"""Episode number in case if category is EPISODE.""" """Episode number in case if category is EPISODE."""
duration_in_seconds: int | None = None
"""Duration of potential video file in seconds."""
video_stream: dict | None = None
"""Meta information about the first video stream found."""
@property
def video_bitrate(self) -> float | None:
"""Bitrate of video in bps (bits per second)."""
if self.video_stream is None:
return None
return self.video_stream["bit_rate"]
@property
def resolution(self) -> Resolution | None:
"""Resolution of a possible video stream."""
if self.video_stream is None:
return None
return Resolution(
width=int(self.video_stream["width"]),
height=int(self.video_stream["height"]),
)
def __post_init__(self):
"""Read basic metadata often referenced for processing."""
m = self.read_metadata()
if m["format"]["duration"]:
self.duration_in_seconds = int(m["format"]["duration"])
self.video_stream = next(
(s for s in m["streams"] if s["codec_type"] == "video"), None
)
@property @property
def new_path(self) -> Path: def new_path(self) -> Path:
"""New Path.""" """New Path."""
return Path( return Path(
path.join( path.join(
self.parent_path.path, self.parent_path.path,
f"{self.new_file_name}{self.original_path.suffix}", f"{self.new_file_name}{self.path.suffix}",
) )
) )
def read_metadata(self) -> dict:
"""Reads metadata using ffprobe."""
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
self.path,
]
result = subprocess.run(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
return json.loads(result.stdout)
@dataclass @dataclass
class PathInfo: class PathInfo: