From bf699333824f669015cabf22b1866bbac262c8a3 Mon Sep 17 00:00:00 2001 From: Max G Date: Mon, 4 Mar 2019 20:09:11 +0100 Subject: [PATCH] Implemented feedparser as RssSource --- Calendar/RssParserPosts.py | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Calendar/RssParserPosts.py diff --git a/Calendar/RssParserPosts.py b/Calendar/RssParserPosts.py new file mode 100644 index 0000000..721a579 --- /dev/null +++ b/Calendar/RssParserPosts.py @@ -0,0 +1,44 @@ +from RssInterface import RssInterface +from datetime import datetime, timedelta, date +import feedparser +import arrow +import RssPost + +max_range_days = 14 + +class RssParserPosts (RssInterface): + """Fetches posts from url-addresses via rss parser.""" + def __init__(self, urls): + self.urls = urls + super(RssParserPosts, self).__init__() + + def __get_posts__(self): + posts = [] + + today = date.today() + self.time_span = today - timedelta(days=max_range_days) + + for feeds in self.urls: + parse = feedparser.parse(feeds) + for post in parse.entries: + parsed_post = self.__parse_post__(post) + if parsed_post.datetime.date() >= self.time_span: + posts.append(parsed_post) + return posts + + def __parse_post__(self, post): + parsed_post = RssPost.RssPost() + parsed_post.fetch_datetime = datetime.now() + + parsed_post.title = post.title + parsed_post.description = post.description + parsed_post.source = self.__get_webpage__(post.link) + parsed_post.datetime = datetime(*post.published_parsed[:6]) + + return parsed_post + + def __get_webpage__(self, link): + start_index = link.find('://') + 3 + end_index = link[start_index:].find('/') + start_index + return link[start_index : end_index] + \ No newline at end of file