1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
import feedparser
import PyRSS2Gen
import datetime
class FeedParser:
@staticmethod
def date_tuple_to_datetime(date_tuple):
return datetime.datetime(*(date_tuple[:5])) if date_tuple else None
@staticmethod
def get_first(lst):
return lst[0] if lst and len(lst) > 0 else None
@staticmethod
def get_keys(lst, key):
if lst is None:
return None
return [e[key] for e in lst]
def get_rss_item_for_entry(self, entry):
rss_item = PyRSS2Gen.RSSItem(
title=entry.get('title'),
link=entry.get('link'),
description=entry.get('description'),
author=entry.get('author'),
categories=self.get_keys(entry.get('tags'), 'term'),
comments=entry.get('comments'),
enclosure=self.get_first(entry.get('enclosures')),
guid=entry.get('id'),
pubDate=self.date_tuple_to_datetime(entry.get('published_parsed')),
source=entry.get('source'),
)
rss_item.source_entity = entry
return rss_item
def get_rss2_from_feed(self, feed, entries):
rss_feed = PyRSS2Gen.RSS2(
title=feed.get('title'),
link=feed.get('link'),
description=feed.get('subtitle'),
language=feed.get('language'),
copyright=feed.get('rights'),
managingEditor=self.get_first(self.get_keys(feed.get('contributors'), 'name')),
webMaster=feed.get('publisher'),
pubDate=self.date_tuple_to_datetime(feed.get('published_parsed')),
lastBuildDate=self.date_tuple_to_datetime(feed.get('updated_parsed')),
categories=self.get_keys(feed.get('tags'), 'term'),
generator=feed.get('generator'),
docs=feed.get('docs'),
cloud=feed.get('cloud'),
ttl=feed.get('ttl'),
image=feed.get('image'),
rating=None,
textInput=feed.get('textinput'),
skipHours=None,
skipDays=None,
items=[self.get_rss_item_for_entry(entry) for entry in entries],
)
rss_feed.source_entity = feed
return rss_feed
def parse(self, feed):
parsed_feed = feedparser.parse(feed)
return self.get_rss2_from_feed(parsed_feed.feed, parsed_feed.entries)
|