aboutsummaryrefslogtreecommitdiff
path: root/afeedprocessor/afeedparser.py
blob: 3e7653b68b8dcbf472cad1ce9b512ea9ba58ca7e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import feedparser
import PyRSS2Gen
import datetime


class FeedParser:
    @staticmethod
    def date_tuple_to_datetime(date_tuple):
        return datetime.datetime(*(date_tuple[:5])) if date_tuple else None

    @staticmethod
    def get_first(lst):
        return lst[0] if lst and len(lst) > 0 else None

    @staticmethod
    def get_terms(lst):
        if lst is None:
            return None
        return [e['term'] for e in lst]

    def get_rss_item_for_entry(self, entry):
        rss_item = PyRSS2Gen.RSSItem(
            title=entry.get('title'),
            link=entry.get('link'),
            description=entry.get('description'),
            author=entry.get('author'),
            categories=self.get_terms(entry.get('tags')),
            comments=entry.get('comments'),
            enclosure=self.get_first(entry.get('enclosures')),
            guid=entry.get('id'),
            pubDate=self.date_tuple_to_datetime(entry.get('published_parsed')),
            source=entry.get('source'),
        )

        rss_item.source_entity = entry
        return rss_item

    def get_rss2_from_feed(self, feed, entries):
        rss_feed = PyRSS2Gen.RSS2(
            title=feed.get('title'),
            link=feed.get('link'),
            description=feed.get('subtitle'),

            language=feed.get('language'),
            copyright=feed.get('rights'),
            managingEditor=feed.get('contributors'),
            webMaster=feed.get('publisher'),
            pubDate=self.date_tuple_to_datetime(feed.get('published_parsed')),
            lastBuildDate=self.date_tuple_to_datetime(feed.get('updated_parsed')),

            categories=self.get_terms(feed.get('tags')),
            generator=feed.get('generator'),
            docs=feed.get('docs'),
            cloud=feed.get('cloud'),
            ttl=feed.get('ttl'),

            image=feed.get('image'),
            rating=None,
            textInput=feed.get('textinput'),
            skipHours=None,
            skipDays=None,

            items=[self.get_rss_item_for_entry(entry) for entry in entries],
        )

        rss_feed.source_entity = feed
        return rss_feed

    def parse(self, feed):
        parsed_feed = feedparser.parse(feed)
        return self.get_rss2_from_feed(parsed_feed.feed, parsed_feed.entries)