diff --git a/src/dvd-compare-python/dvd_compare.py b/src/dvd-compare-python/dvd_compare.py index 387e67e..156a51e 100644 --- a/src/dvd-compare-python/dvd_compare.py +++ b/src/dvd-compare-python/dvd_compare.py @@ -13,8 +13,10 @@ class DcSearchResult: """Year of media parsed from label. Parsing has not been tested on all cases.""" self.title: str """Title of media parsed from label. Parsing has not been tested on all cases.""" + self.tagline: str + """Unprocessed tagline returned by search.""" self.format: str - """Format (DVD, Blu-Ray, etc.) parsed from label. Parsing has not been tested on all cases.""" + """Format (DVD, Blu-Ray, etc.) parsed from label. Defaults to DVD if no explicit format is given. Parsing has not been tested on all cases.""" self.fid: int """FID, used to identify media by dvdcompare.net, parsed from URL. Can be used to retrive further details.""" self.url: str @@ -23,7 +25,7 @@ class DcSearchResult: """Description, listing available publications to label, as returned by search.""" def __repr__(self): - return f"{self.label}\n{self.description}\n" + return f"{self.label}\n{self.title} | {self.format} | {self.year}\n{self.tagline}\n[{self.fid} | {self.url}]\n{self.description}\n" class DvdCompare: @@ -33,10 +35,35 @@ class DvdCompare: """ Can be adjusted, if the domain should ever be changed.""" def _reduce_spaces(text: str) -> str: - return re.sub(r"\s+", " ", text) - - def _get_title(label: str) -> str: - + return re.sub(r"\s+", " ", text).strip() + + def _parse_result(item) -> DcSearchResult: + result = DcSearchResult() + + # Get direct values + result.label = DvdCompare._reduce_spaces(item.strong.a.text) + result.description = DvdCompare._reduce_spaces( + f"{item.find('i').text}{list(item.children)[4]}" + ) + result.url = f"{DvdCompare.BASE_URL}/comparisons/{item.strong.a.get('href')}" + result.tagline = item.strong.a.get("title") + + # Parse QoL values + label_matches = re.match( + r"^(?P[^()]+)\s(?P<the>\(The\))?\s?(?:\((?P<format>[^()]+)\))?\s?\((?P<year>[0-9]{4})\)$", + result.label, + re.IGNORECASE, + ) + + result.format = label_matches["format"] if label_matches["format"] else "DVD" + result.year = int(label_matches["year"]) + result.title = label_matches["title"] + if label_matches["the"]: + result.title = f"The {result.title}" + + result.fid = int(result.url.split("?fid=")[1]) + + return result async def search_async( title: str | None, @@ -93,15 +120,7 @@ class DvdCompare: if result_parent is None: return [] # Nothing found - all_results = [] - for child in result_parent.find_all("li"): - next_result = DcSearchResult() - next_result.label = DvdCompare._reduce_spaces(child.strong.a.text) - next_result.description = DvdCompare._reduce_spaces(f"{child.find('i').text}{list(child.children)[4]}") - - all_results.append(next_result) - - return all_results + return [DvdCompare._parse_result(i) for i in result_parent.find_all("li")] if __name__ == "__main__":