summaryrefslogtreecommitdiff
path: root/bandict/__init__.py
blob: 389ae0ecbd8ce3ae8791f511582baf911c6178ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import urllib.request
import json
import numpy as np
from banapedia.wapi.WikipediaQuery import BlockQuery
from banapedia.Ban import *


class BanList():

    def __init__(self, data_file, samples=30000, samples_by_query=500,
                 from_internet=False):
        if from_internet:
            proxy = urllib.request.ProxyHandler(urllib.request.getproxies())
            opener = urllib.request.build_opener(proxy)
            urllib.request.install_opener(opener)
            self.dict_list = self.fetch_multipart(samples, samples_by_query)
        else:
            with open(data_file, "r") as ban_dict_file:
                self.dict_list = json.load(ban_dict_file)
        self.ban_list = []
        for ban_dict in self.dict_list:
            self.ban_list.append(Ban(
                ban_dict["user"],
                datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP),
                datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP),
            ))

    def fetch_multipart(self, n, query_limit):
        ban_dict_list = []
        n_fetched = 0
        continue_token = None

        print("[INFO]", "Fetching %d bans" % n)
        while n_fetched < n:
            to_fetch = min(query_limit, n - n_fetched)
            query = BlockQuery(
                bkprop=["user", "timestamp", "expiry"],
                bkshow=["temp", "ip"],
                limit=to_fetch,
                continue_token=continue_token,
            )
            results = query.fetch_result()
            ban_dict_list.extend(results["query"]["blocks"])
            continue_token = results["query-continue"]["blocks"]["bkcontinue"]
            n_fetched += to_fetch
            print("[INFO]", "Fetched %d over %d bans" % (n_fetched, n))

        print("[INFO]", "Bans fetching complete")
        return ban_dict_list

    def write_to_file(self, outfile):
        with open(outfile, "w") as ban_dict_file:
            json.dump(self.dict_list, ban_dict_file, indent="\t")

    def get_durations(self):
        return [ban.get_duration() for ban in self.ban_list]

    def get_countries(self):
        return [ban.get_country_code() for ban in self.ban_list]

    def __iter__(self):
        return self.dict_list.__iter__()

    def items(self):
        return self.dict_list.items()

    def by_country(self):
        ban_duration_by_country = {}

        for ban in self.ban_list:
            country_code = ban.get_country_code()

            if country_code not in ban_duration_by_country.keys():
                ban_duration_by_country[country_code] = []

            ban_duration_by_country[country_code].append(ban)

        return ban_duration_by_country

    def average_ban_by_country(self):
        average_ban_duration_ctry = {}

        for country, bans in self.by_country().items():
            average = np.mean([ban.get_duration() for ban in bans])
            average_ban_duration_ctry[country] = average

        # In months
        average_ban_duration_ctry = {country: duration/30
                                     for country, duration in
                                     average_ban_duration_ctry.items()}
        return average_ban_duration_ctry