E-Paper-Calendar/Calendar/RssParserPosts.py

58 lines
1.6 KiB
Python
Raw Normal View History

2019-03-04 20:09:11 +01:00
from RssInterface import RssInterface
from datetime import datetime, timedelta, date
import feedparser
import RssPost
2019-04-18 08:30:08 +02:00
from urllib.request import urlopen
2019-03-04 20:09:11 +01:00
max_range_days = 14
2019-07-13 08:05:35 +02:00
2019-03-04 20:09:11 +01:00
class RssParserPosts (RssInterface):
"""Fetches posts from url-addresses via rss parser."""
2019-07-13 08:05:35 +02:00
2019-03-04 20:09:11 +01:00
def __init__(self, urls):
self.urls = urls
super(RssParserPosts, self).__init__()
2019-04-18 08:30:08 +02:00
def is_available(self):
try:
testurl = ""
if self.urls:
testurl = self.urls[0]
else:
return False
urlopen(testurl)
2019-04-18 08:30:08 +02:00
return True
except:
return False
2019-03-04 20:09:11 +01:00
def __get_posts__(self):
posts = []
today = date.today()
2019-03-04 20:11:19 +01:00
time_span = today - timedelta(days=max_range_days)
2019-03-04 20:09:11 +01:00
for feeds in self.urls:
parse = feedparser.parse(feeds)
for post in parse.entries:
parsed_post = self.__parse_post__(post)
2019-03-04 20:11:19 +01:00
if parsed_post.datetime.date() >= time_span:
2019-03-04 20:09:11 +01:00
posts.append(parsed_post)
return posts
def __parse_post__(self, post):
parsed_post = RssPost.RssPost()
parsed_post.fetch_datetime = datetime.now()
parsed_post.title = post.title
parsed_post.description = post.description
parsed_post.source = self.__get_webpage__(post.link)
parsed_post.datetime = datetime(*post.published_parsed[:6])
return parsed_post
def __get_webpage__(self, link):
start_index = link.find('://') + 3
end_index = link[start_index:].find('/') + start_index
2019-07-13 08:05:35 +02:00
return link[start_index: end_index]