diff --git a/src/dvd-compare-python/dvd_compare.py b/src/dvd-compare-python/dvd_compare.py index 387e67e..156a51e 100644 --- a/src/dvd-compare-python/dvd_compare.py +++ b/src/dvd-compare-python/dvd_compare.py @@ -13,8 +13,10 @@ class DcSearchResult: """Year of media parsed from label. Parsing has not been tested on all cases.""" self.title: str """Title of media parsed from label. Parsing has not been tested on all cases.""" + self.tagline: str + """Unprocessed tagline returned by search.""" self.format: str - """Format (DVD, Blu-Ray, etc.) parsed from label. Parsing has not been tested on all cases.""" + """Format (DVD, Blu-Ray, etc.) parsed from label. Defaults to DVD if no explicit format is given. Parsing has not been tested on all cases.""" self.fid: int """FID, used to identify media by dvdcompare.net, parsed from URL. Can be used to retrive further details.""" self.url: str @@ -23,7 +25,7 @@ class DcSearchResult: """Description, listing available publications to label, as returned by search.""" def __repr__(self): - return f"{self.label}\n{self.description}\n" + return f"{self.label}\n{self.title} | {self.format} | {self.year}\n{self.tagline}\n[{self.fid} | {self.url}]\n{self.description}\n" class DvdCompare: @@ -33,10 +35,35 @@ class DvdCompare: """ Can be adjusted, if the domain should ever be changed.""" def _reduce_spaces(text: str) -> str: - return re.sub(r"\s+", " ", text) - - def _get_title(label: str) -> str: - + return re.sub(r"\s+", " ", text).strip() + + def _parse_result(item) -> DcSearchResult: + result = DcSearchResult() + + # Get direct values + result.label = DvdCompare._reduce_spaces(item.strong.a.text) + result.description = DvdCompare._reduce_spaces( + f"{item.find('i').text}{list(item.children)[4]}" + ) + result.url = f"{DvdCompare.BASE_URL}/comparisons/{item.strong.a.get('href')}" + result.tagline = item.strong.a.get("title") + + # Parse QoL values + label_matches = re.match( + r"^(?P