Refactored parsing of search results and covering all known cases for know
This commit is contained in:
parent
2eed3e1cdf
commit
022a41723b
1 changed files with 37 additions and 26 deletions
|
@ -57,37 +57,47 @@ class DvdCompare:
|
|||
# Parse QoL values
|
||||
result.fid = int(result.url.split("?fid=")[1])
|
||||
|
||||
label_matches = re.match(
|
||||
r"^(?P<titles>(?:[^()]+\s(?:\((?:The|A|An)\))?(?:(?P<formats>(?:\([^()]+\)\s?)*)\s)?(?:\s?AKA\s?)?)+)\s?\((?P<years>(?P<year>[0-9]{4})(?:[-,][0-9]{4})?)\)$",
|
||||
result.label.replace("", "-"),
|
||||
re.IGNORECASE,
|
||||
)
|
||||
result.formats = []
|
||||
result.titles = []
|
||||
for part in (
|
||||
result.label.replace("", "-").replace("\x96", "-").split(" AKA ")
|
||||
): # Iterate over alternative titles and collect information
|
||||
part_matches = re.match(
|
||||
r"^(?P<title>[^()\n]*[^()\s])(?:\s\((?P<prefix>The|A|An)\)(?P<suffix>[^()\n]+)?)?(?P<formats> ?(?:\([^()]*[a-z]+[^()]*\)\s?)+)?(?P<years>(?: ?\((?P<range>(?P<year>[0-9]{4})(?:[-,/][0-9]{1,2}(?:[0-9]{2})?)?)\)))?",
|
||||
part,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
if not label_matches:
|
||||
if part_matches is None:
|
||||
raise UnknownFormatError(
|
||||
f"Label part in unknown format that could not be parsed: {part}"
|
||||
)
|
||||
|
||||
# Title
|
||||
title = f"{part_matches['title']}{part_matches['suffix']}".strip()
|
||||
if part_matches["prefix"]:
|
||||
title = f"{part_matches['prefix']} {title}"
|
||||
result.titles.append(title)
|
||||
|
||||
# Formats
|
||||
if part_matches["formats"]:
|
||||
result.formats = [
|
||||
m
|
||||
for m in re.findall(
|
||||
r"\((?P<format>[^()]+)\)", part_matches["formats"]
|
||||
)
|
||||
]
|
||||
|
||||
# Years
|
||||
if part_matches["years"]:
|
||||
result.year = int(part_matches["year"])
|
||||
result.years = part_matches["years"]
|
||||
|
||||
if len(result.titles) <= 0:
|
||||
raise UnknownFormatError(
|
||||
f"Label in unknown format that could not be parsed: {result.label}"
|
||||
)
|
||||
|
||||
result.formats = []
|
||||
if label_matches["formats"]:
|
||||
result.formats = [
|
||||
m
|
||||
for m in re.findall(r"\((?P<format>[^()]+)\)", label_matches["formats"])
|
||||
]
|
||||
result.year = int(label_matches["year"])
|
||||
result.years = label_matches["years"]
|
||||
result.titles = []
|
||||
for raw_title in label_matches["titles"].strip().split(" AKA "):
|
||||
title_match = re.match(
|
||||
r"(?P<title>[^()]+[^() ])(?:\s\((?P<prefix>The|A|An)\))?(?:\s\([^()]+\))?",
|
||||
raw_title,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
title = title_match["title"]
|
||||
if title_match["prefix"]:
|
||||
title = f"{title_match['prefix']} {title}"
|
||||
result.titles.append(title)
|
||||
|
||||
return result
|
||||
|
||||
async def search_async(
|
||||
|
@ -157,6 +167,7 @@ if __name__ == "__main__":
|
|||
results = await DvdCompare.search_async("The")
|
||||
if results:
|
||||
print(results[0])
|
||||
print(f"And {len(results)} additional results")
|
||||
else:
|
||||
print("No results found.")
|
||||
|
||||
|
|
Loading…
Reference in a new issue