Implemented search functionality

This commit is contained in:
Maximilian Giller 2025-02-18 14:26:44 +01:00
parent d369ba16f4
commit 1e407d1ad2

View file

@ -13,8 +13,10 @@ class DcSearchResult:
"""Year of media parsed from label. Parsing has not been tested on all cases.""" """Year of media parsed from label. Parsing has not been tested on all cases."""
self.title: str self.title: str
"""Title of media parsed from label. Parsing has not been tested on all cases.""" """Title of media parsed from label. Parsing has not been tested on all cases."""
self.tagline: str
"""Unprocessed tagline returned by search."""
self.format: str self.format: str
"""Format (DVD, Blu-Ray, etc.) parsed from label. Parsing has not been tested on all cases.""" """Format (DVD, Blu-Ray, etc.) parsed from label. Defaults to DVD if no explicit format is given. Parsing has not been tested on all cases."""
self.fid: int self.fid: int
"""FID, used to identify media by dvdcompare.net, parsed from URL. Can be used to retrive further details.""" """FID, used to identify media by dvdcompare.net, parsed from URL. Can be used to retrive further details."""
self.url: str self.url: str
@ -23,7 +25,7 @@ class DcSearchResult:
"""Description, listing available publications to label, as returned by search.""" """Description, listing available publications to label, as returned by search."""
def __repr__(self): def __repr__(self):
return f"{self.label}\n{self.description}\n" return f"{self.label}\n{self.title} | {self.format} | {self.year}\n{self.tagline}\n[{self.fid} | {self.url}]\n{self.description}\n"
class DvdCompare: class DvdCompare:
@ -33,10 +35,35 @@ class DvdCompare:
""" Can be adjusted, if the domain should ever be changed.""" """ Can be adjusted, if the domain should ever be changed."""
def _reduce_spaces(text: str) -> str: def _reduce_spaces(text: str) -> str:
return re.sub(r"\s+", " ", text) return re.sub(r"\s+", " ", text).strip()
def _get_title(label: str) -> str: def _parse_result(item) -> DcSearchResult:
result = DcSearchResult()
# Get direct values
result.label = DvdCompare._reduce_spaces(item.strong.a.text)
result.description = DvdCompare._reduce_spaces(
f"{item.find('i').text}{list(item.children)[4]}"
)
result.url = f"{DvdCompare.BASE_URL}/comparisons/{item.strong.a.get('href')}"
result.tagline = item.strong.a.get("title")
# Parse QoL values
label_matches = re.match(
r"^(?P<title>[^()]+)\s(?P<the>\(The\))?\s?(?:\((?P<format>[^()]+)\))?\s?\((?P<year>[0-9]{4})\)$",
result.label,
re.IGNORECASE,
)
result.format = label_matches["format"] if label_matches["format"] else "DVD"
result.year = int(label_matches["year"])
result.title = label_matches["title"]
if label_matches["the"]:
result.title = f"The {result.title}"
result.fid = int(result.url.split("?fid=")[1])
return result
async def search_async( async def search_async(
title: str | None, title: str | None,
@ -93,15 +120,7 @@ class DvdCompare:
if result_parent is None: if result_parent is None:
return [] # Nothing found return [] # Nothing found
all_results = [] return [DvdCompare._parse_result(i) for i in result_parent.find_all("li")]
for child in result_parent.find_all("li"):
next_result = DcSearchResult()
next_result.label = DvdCompare._reduce_spaces(child.strong.a.text)
next_result.description = DvdCompare._reduce_spaces(f"{child.find('i').text}{list(child.children)[4]}")
all_results.append(next_result)
return all_results
if __name__ == "__main__": if __name__ == "__main__":