aboutsummaryrefslogtreecommitdiff
path: root/afeedprocessor
diff options
context:
space:
mode:
authorPacien TRAN-GIRARD2015-08-11 01:37:11 +0200
committerPacien TRAN-GIRARD2015-08-11 12:35:27 +0200
commit45b8d0637a0669c3281fc0449b08a93ea0b7a809 (patch)
tree654cdd9948c176ec7259acb4b90b8d44b6b76274 /afeedprocessor
downloadafeedprocessor-45b8d0637a0669c3281fc0449b08a93ea0b7a809.tar.gz
First (working) version
Diffstat (limited to 'afeedprocessor')
-rw-r--r--afeedprocessor/afeedparser.py59
-rw-r--r--afeedprocessor/afeedprocessor.py96
-rw-r--r--afeedprocessor/anitemprocessor.py47
3 files changed, 202 insertions, 0 deletions
diff --git a/afeedprocessor/afeedparser.py b/afeedprocessor/afeedparser.py
new file mode 100644
index 0000000..0c23187
--- /dev/null
+++ b/afeedprocessor/afeedparser.py
@@ -0,0 +1,59 @@
1import feedparser
2import PyRSS2Gen
3import datetime
4
5
6class FeedParser:
7 @staticmethod
8 def date_tuple_to_datetime(date_tuple):
9 return datetime.datetime(*(date_tuple[:5])) if date_tuple else None
10
11 @staticmethod
12 def get_first(lst):
13 return lst[0] if lst and len(lst) > 0 else None
14
15 def get_rss_item_for_entry(self, entry):
16 return PyRSS2Gen.RSSItem(
17 title=entry.get('title'),
18 link=entry.get('link'),
19 description=entry.get('description'),
20 author=entry.get('author'),
21 categories=entry.get('tags'),
22 comments=entry.get('comments'),
23 enclosure=self.get_first(entry.get('enclosures')),
24 guid=entry.get('id'),
25 pubDate=self.date_tuple_to_datetime(entry.get('published_parsed')),
26 source=entry.get('source'),
27 )
28
29 def get_rss2_from_feed(self, feed, entries):
30 return PyRSS2Gen.RSS2(
31 title=feed.get('title'),
32 link=feed.get('link'),
33 description=feed.get('subtitle'),
34
35 language=feed.get('language'),
36 copyright=feed.get('rights'),
37 managingEditor=feed.get('contributors'),
38 webMaster=feed.get('publisher'),
39 pubDate=self.date_tuple_to_datetime(feed.get('published_parsed')),
40 lastBuildDate=self.date_tuple_to_datetime(feed.get('updated_parsed')),
41
42 categories=feed.get('tags'),
43 generator=feed.get('generator'),
44 docs=feed.get('docs'),
45 cloud=feed.get('cloud'),
46 ttl=feed.get('ttl'),
47
48 image=feed.get('image'),
49 rating=None,
50 textInput=feed.get('textinput'),
51 skipHours=None,
52 skipDays=None,
53
54 items=[self.get_rss_item_for_entry(entry) for entry in entries],
55 )
56
57 def parse(self, feed):
58 parsed_feed = feedparser.parse(feed)
59 return self.get_rss2_from_feed(parsed_feed.feed, parsed_feed.entries)
diff --git a/afeedprocessor/afeedprocessor.py b/afeedprocessor/afeedprocessor.py
new file mode 100644
index 0000000..e3663ee
--- /dev/null
+++ b/afeedprocessor/afeedprocessor.py
@@ -0,0 +1,96 @@
1import PyRSS2Gen
2
3from afeedprocessor.anitemprocessor import ItemProcessor
4
5
6class FeedProcessor:
7 def __init__(self, item_processor: ItemProcessor=None):
8 if item_processor is None:
9 self.item_processor = ItemProcessor()
10 else:
11 self.item_processor = item_processor
12
13 def get_title(self, title, feed):
14 return title
15
16 def get_link(self, link, feed):
17 return link
18
19 def get_description(self, description, feed):
20 return description
21
22 def get_language(self, language, feed):
23 return language
24
25 def get_copyright(self, copyright, feed):
26 return copyright
27
28 def get_managing_editor(self, managing_editor, feed):
29 return managing_editor
30
31 def get_web_master(self, web_master, feed):
32 return web_master
33
34 def get_pub_date(self, pub_date, feed):
35 return pub_date
36
37 def get_last_build_date(self, last_build_date, feed):
38 return last_build_date
39
40 def get_categories(self, categories, feed):
41 return categories
42
43 def get_generator(self, generator, feed):
44 return generator
45
46 def get_docs(self, docs, feed):
47 return docs
48
49 def get_cloud(self, cloud, feed):
50 return cloud
51
52 def get_ttl(self, ttl, feed):
53 return ttl
54
55 def get_image(self, image, feed):
56 return image
57
58 def get_rating(self, rating, feed):
59 return rating
60
61 def get_text_input(self, text_input, feed):
62 return text_input
63
64 def get_skip_hours(self, skip_hours, feed):
65 return skip_hours
66
67 def get_skip_days(self, skip_days, feed):
68 return skip_days
69
70 def get_items(self, items, feed):
71 return [self.item_processor.process(item) for item in items]
72
73 def process(self, feed: PyRSS2Gen.RSS2):
74 return PyRSS2Gen.RSS2(
75 title=self.get_title(feed.title, feed),
76 link=self.get_link(feed.link, feed),
77 description=self.get_description(feed.description, feed),
78 language=self.get_language(feed.language, feed),
79 copyright=self.get_copyright(feed.copyright, feed),
80 managingEditor=self.get_managing_editor(feed.managingEditor, feed),
81 webMaster=self.get_web_master(feed.webMaster, feed),
82 pubDate=self.get_pub_date(feed.pubDate, feed),
83 lastBuildDate=self.get_last_build_date(feed.lastBuildDate, feed),
84 categories=self.get_categories(feed.categories, feed),
85 generator=self.get_generator(feed.generator, feed),
86 docs=self.get_docs(feed.docs, feed),
87 cloud=self.get_cloud(feed.cloud, feed),
88 ttl=self.get_ttl(feed.ttl, feed),
89 image=self.get_image(feed.image, feed),
90 rating=self.get_rating(feed.rating, feed),
91 textInput=self.get_text_input(feed.textInput, feed),
92 skipHours=self.get_skip_hours(feed.skipHours, feed),
93 skipDays=self.get_skip_days(feed.skipDays, feed),
94 items=self.get_items(feed.items, feed)
95 # Hurray for code generation!
96 )
diff --git a/afeedprocessor/anitemprocessor.py b/afeedprocessor/anitemprocessor.py
new file mode 100644
index 0000000..3f0eabc
--- /dev/null
+++ b/afeedprocessor/anitemprocessor.py
@@ -0,0 +1,47 @@
1import PyRSS2Gen
2
3
4class ItemProcessor:
5 def get_title(self, title, item):
6 return title
7
8 def get_link(self, link, item):
9 return link
10
11 def get_description(self, description, item):
12 return description
13
14 def get_author(self, author, item):
15 return author
16
17 def get_categories(self, categories, item):
18 return categories
19
20 def get_comments(self, comments, item):
21 return comments
22
23 def get_enclosure(self, enclosure, item):
24 return enclosure
25
26 def get_guid(self, guid, item):
27 return guid
28
29 def get_pub_date(self, pub_date, item):
30 return pub_date
31
32 def get_source(self, source, item):
33 return source
34
35 def process(self, item: PyRSS2Gen.RSSItem):
36 return PyRSS2Gen.RSSItem(
37 title=self.get_title(item.title, item),
38 link=self.get_link(item.link, item),
39 description=self.get_description(item.description, item),
40 author=self.get_author(item.author, item),
41 categories=self.get_categories(item.categories, item),
42 comments=self.get_comments(item.comments, item),
43 enclosure=self.get_enclosure(item.enclosure, item),
44 guid=self.get_guid(item.guid, item),
45 pubDate=self.get_pub_date(item.pubDate, item),
46 source=self.get_source(item.source, item)
47 )