diff options
author | Pacien TRAN-GIRARD | 2015-08-11 01:37:11 +0200 |
---|---|---|
committer | Pacien TRAN-GIRARD | 2015-08-11 12:35:27 +0200 |
commit | 45b8d0637a0669c3281fc0449b08a93ea0b7a809 (patch) | |
tree | 654cdd9948c176ec7259acb4b90b8d44b6b76274 /afeedprocessor | |
download | afeedprocessor-45b8d0637a0669c3281fc0449b08a93ea0b7a809.tar.gz |
First (working) version
Diffstat (limited to 'afeedprocessor')
-rw-r--r-- | afeedprocessor/afeedparser.py | 59 | ||||
-rw-r--r-- | afeedprocessor/afeedprocessor.py | 96 | ||||
-rw-r--r-- | afeedprocessor/anitemprocessor.py | 47 |
3 files changed, 202 insertions, 0 deletions
diff --git a/afeedprocessor/afeedparser.py b/afeedprocessor/afeedparser.py new file mode 100644 index 0000000..0c23187 --- /dev/null +++ b/afeedprocessor/afeedparser.py | |||
@@ -0,0 +1,59 @@ | |||
1 | import feedparser | ||
2 | import PyRSS2Gen | ||
3 | import datetime | ||
4 | |||
5 | |||
6 | class FeedParser: | ||
7 | @staticmethod | ||
8 | def date_tuple_to_datetime(date_tuple): | ||
9 | return datetime.datetime(*(date_tuple[:5])) if date_tuple else None | ||
10 | |||
11 | @staticmethod | ||
12 | def get_first(lst): | ||
13 | return lst[0] if lst and len(lst) > 0 else None | ||
14 | |||
15 | def get_rss_item_for_entry(self, entry): | ||
16 | return PyRSS2Gen.RSSItem( | ||
17 | title=entry.get('title'), | ||
18 | link=entry.get('link'), | ||
19 | description=entry.get('description'), | ||
20 | author=entry.get('author'), | ||
21 | categories=entry.get('tags'), | ||
22 | comments=entry.get('comments'), | ||
23 | enclosure=self.get_first(entry.get('enclosures')), | ||
24 | guid=entry.get('id'), | ||
25 | pubDate=self.date_tuple_to_datetime(entry.get('published_parsed')), | ||
26 | source=entry.get('source'), | ||
27 | ) | ||
28 | |||
29 | def get_rss2_from_feed(self, feed, entries): | ||
30 | return PyRSS2Gen.RSS2( | ||
31 | title=feed.get('title'), | ||
32 | link=feed.get('link'), | ||
33 | description=feed.get('subtitle'), | ||
34 | |||
35 | language=feed.get('language'), | ||
36 | copyright=feed.get('rights'), | ||
37 | managingEditor=feed.get('contributors'), | ||
38 | webMaster=feed.get('publisher'), | ||
39 | pubDate=self.date_tuple_to_datetime(feed.get('published_parsed')), | ||
40 | lastBuildDate=self.date_tuple_to_datetime(feed.get('updated_parsed')), | ||
41 | |||
42 | categories=feed.get('tags'), | ||
43 | generator=feed.get('generator'), | ||
44 | docs=feed.get('docs'), | ||
45 | cloud=feed.get('cloud'), | ||
46 | ttl=feed.get('ttl'), | ||
47 | |||
48 | image=feed.get('image'), | ||
49 | rating=None, | ||
50 | textInput=feed.get('textinput'), | ||
51 | skipHours=None, | ||
52 | skipDays=None, | ||
53 | |||
54 | items=[self.get_rss_item_for_entry(entry) for entry in entries], | ||
55 | ) | ||
56 | |||
57 | def parse(self, feed): | ||
58 | parsed_feed = feedparser.parse(feed) | ||
59 | return self.get_rss2_from_feed(parsed_feed.feed, parsed_feed.entries) | ||
diff --git a/afeedprocessor/afeedprocessor.py b/afeedprocessor/afeedprocessor.py new file mode 100644 index 0000000..e3663ee --- /dev/null +++ b/afeedprocessor/afeedprocessor.py | |||
@@ -0,0 +1,96 @@ | |||
1 | import PyRSS2Gen | ||
2 | |||
3 | from afeedprocessor.anitemprocessor import ItemProcessor | ||
4 | |||
5 | |||
6 | class FeedProcessor: | ||
7 | def __init__(self, item_processor: ItemProcessor=None): | ||
8 | if item_processor is None: | ||
9 | self.item_processor = ItemProcessor() | ||
10 | else: | ||
11 | self.item_processor = item_processor | ||
12 | |||
13 | def get_title(self, title, feed): | ||
14 | return title | ||
15 | |||
16 | def get_link(self, link, feed): | ||
17 | return link | ||
18 | |||
19 | def get_description(self, description, feed): | ||
20 | return description | ||
21 | |||
22 | def get_language(self, language, feed): | ||
23 | return language | ||
24 | |||
25 | def get_copyright(self, copyright, feed): | ||
26 | return copyright | ||
27 | |||
28 | def get_managing_editor(self, managing_editor, feed): | ||
29 | return managing_editor | ||
30 | |||
31 | def get_web_master(self, web_master, feed): | ||
32 | return web_master | ||
33 | |||
34 | def get_pub_date(self, pub_date, feed): | ||
35 | return pub_date | ||
36 | |||
37 | def get_last_build_date(self, last_build_date, feed): | ||
38 | return last_build_date | ||
39 | |||
40 | def get_categories(self, categories, feed): | ||
41 | return categories | ||
42 | |||
43 | def get_generator(self, generator, feed): | ||
44 | return generator | ||
45 | |||
46 | def get_docs(self, docs, feed): | ||
47 | return docs | ||
48 | |||
49 | def get_cloud(self, cloud, feed): | ||
50 | return cloud | ||
51 | |||
52 | def get_ttl(self, ttl, feed): | ||
53 | return ttl | ||
54 | |||
55 | def get_image(self, image, feed): | ||
56 | return image | ||
57 | |||
58 | def get_rating(self, rating, feed): | ||
59 | return rating | ||
60 | |||
61 | def get_text_input(self, text_input, feed): | ||
62 | return text_input | ||
63 | |||
64 | def get_skip_hours(self, skip_hours, feed): | ||
65 | return skip_hours | ||
66 | |||
67 | def get_skip_days(self, skip_days, feed): | ||
68 | return skip_days | ||
69 | |||
70 | def get_items(self, items, feed): | ||
71 | return [self.item_processor.process(item) for item in items] | ||
72 | |||
73 | def process(self, feed: PyRSS2Gen.RSS2): | ||
74 | return PyRSS2Gen.RSS2( | ||
75 | title=self.get_title(feed.title, feed), | ||
76 | link=self.get_link(feed.link, feed), | ||
77 | description=self.get_description(feed.description, feed), | ||
78 | language=self.get_language(feed.language, feed), | ||
79 | copyright=self.get_copyright(feed.copyright, feed), | ||
80 | managingEditor=self.get_managing_editor(feed.managingEditor, feed), | ||
81 | webMaster=self.get_web_master(feed.webMaster, feed), | ||
82 | pubDate=self.get_pub_date(feed.pubDate, feed), | ||
83 | lastBuildDate=self.get_last_build_date(feed.lastBuildDate, feed), | ||
84 | categories=self.get_categories(feed.categories, feed), | ||
85 | generator=self.get_generator(feed.generator, feed), | ||
86 | docs=self.get_docs(feed.docs, feed), | ||
87 | cloud=self.get_cloud(feed.cloud, feed), | ||
88 | ttl=self.get_ttl(feed.ttl, feed), | ||
89 | image=self.get_image(feed.image, feed), | ||
90 | rating=self.get_rating(feed.rating, feed), | ||
91 | textInput=self.get_text_input(feed.textInput, feed), | ||
92 | skipHours=self.get_skip_hours(feed.skipHours, feed), | ||
93 | skipDays=self.get_skip_days(feed.skipDays, feed), | ||
94 | items=self.get_items(feed.items, feed) | ||
95 | # Hurray for code generation! | ||
96 | ) | ||
diff --git a/afeedprocessor/anitemprocessor.py b/afeedprocessor/anitemprocessor.py new file mode 100644 index 0000000..3f0eabc --- /dev/null +++ b/afeedprocessor/anitemprocessor.py | |||
@@ -0,0 +1,47 @@ | |||
1 | import PyRSS2Gen | ||
2 | |||
3 | |||
4 | class ItemProcessor: | ||
5 | def get_title(self, title, item): | ||
6 | return title | ||
7 | |||
8 | def get_link(self, link, item): | ||
9 | return link | ||
10 | |||
11 | def get_description(self, description, item): | ||
12 | return description | ||
13 | |||
14 | def get_author(self, author, item): | ||
15 | return author | ||
16 | |||
17 | def get_categories(self, categories, item): | ||
18 | return categories | ||
19 | |||
20 | def get_comments(self, comments, item): | ||
21 | return comments | ||
22 | |||
23 | def get_enclosure(self, enclosure, item): | ||
24 | return enclosure | ||
25 | |||
26 | def get_guid(self, guid, item): | ||
27 | return guid | ||
28 | |||
29 | def get_pub_date(self, pub_date, item): | ||
30 | return pub_date | ||
31 | |||
32 | def get_source(self, source, item): | ||
33 | return source | ||
34 | |||
35 | def process(self, item: PyRSS2Gen.RSSItem): | ||
36 | return PyRSS2Gen.RSSItem( | ||
37 | title=self.get_title(item.title, item), | ||
38 | link=self.get_link(item.link, item), | ||
39 | description=self.get_description(item.description, item), | ||
40 | author=self.get_author(item.author, item), | ||
41 | categories=self.get_categories(item.categories, item), | ||
42 | comments=self.get_comments(item.comments, item), | ||
43 | enclosure=self.get_enclosure(item.enclosure, item), | ||
44 | guid=self.get_guid(item.guid, item), | ||
45 | pubDate=self.get_pub_date(item.pubDate, item), | ||
46 | source=self.get_source(item.source, item) | ||
47 | ) | ||