Implemented search functionality

This commit is contained in:
Maximilian Giller 2025-02-18 14:26:44 +01:00
parent d369ba16f4
commit 1e407d1ad2

View file

@ -13,8 +13,10 @@ class DcSearchResult:
"""Year of media parsed from label. Parsing has not been tested on all cases."""
self.title: str
"""Title of media parsed from label. Parsing has not been tested on all cases."""
self.tagline: str
"""Unprocessed tagline returned by search."""
self.format: str
"""Format (DVD, Blu-Ray, etc.) parsed from label. Parsing has not been tested on all cases."""
"""Format (DVD, Blu-Ray, etc.) parsed from label. Defaults to DVD if no explicit format is given. Parsing has not been tested on all cases."""
self.fid: int
"""FID, used to identify media by dvdcompare.net, parsed from URL. Can be used to retrive further details."""
self.url: str
@ -23,7 +25,7 @@ class DcSearchResult:
"""Description, listing available publications to label, as returned by search."""
def __repr__(self):
return f"{self.label}\n{self.description}\n"
return f"{self.label}\n{self.title} | {self.format} | {self.year}\n{self.tagline}\n[{self.fid} | {self.url}]\n{self.description}\n"
class DvdCompare:
@ -33,10 +35,35 @@ class DvdCompare:
""" Can be adjusted, if the domain should ever be changed."""
def _reduce_spaces(text: str) -> str:
return re.sub(r"\s+", " ", text)
def _get_title(label: str) -> str:
return re.sub(r"\s+", " ", text).strip()
def _parse_result(item) -> DcSearchResult:
result = DcSearchResult()
# Get direct values
result.label = DvdCompare._reduce_spaces(item.strong.a.text)
result.description = DvdCompare._reduce_spaces(
f"{item.find('i').text}{list(item.children)[4]}"
)
result.url = f"{DvdCompare.BASE_URL}/comparisons/{item.strong.a.get('href')}"
result.tagline = item.strong.a.get("title")
# Parse QoL values
label_matches = re.match(
r"^(?P<title>[^()]+)\s(?P<the>\(The\))?\s?(?:\((?P<format>[^()]+)\))?\s?\((?P<year>[0-9]{4})\)$",
result.label,
re.IGNORECASE,
)
result.format = label_matches["format"] if label_matches["format"] else "DVD"
result.year = int(label_matches["year"])
result.title = label_matches["title"]
if label_matches["the"]:
result.title = f"The {result.title}"
result.fid = int(result.url.split("?fid=")[1])
return result
async def search_async(
title: str | None,
@ -93,15 +120,7 @@ class DvdCompare:
if result_parent is None:
return [] # Nothing found
all_results = []
for child in result_parent.find_all("li"):
next_result = DcSearchResult()
next_result.label = DvdCompare._reduce_spaces(child.strong.a.text)
next_result.description = DvdCompare._reduce_spaces(f"{child.find('i').text}{list(child.children)[4]}")
all_results.append(next_result)
return all_results
return [DvdCompare._parse_result(i) for i in result_parent.find_all("li")]
if __name__ == "__main__":