summaryrefslogtreecommitdiff
path: root/main.py
blob: 8187e1f13d61956d70812045eca0f1d3d46be80f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from banapedia.Ban import *
from collections import Counter
import json
import pygal
import numpy as np
import urllib.request

__author__ = 'pacien'


BAN_MAP_FILE = "output/ban-map.svg"
BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg"
HIST_FILE = "output/histogram.svg"

BAN_FILE = "resources/ban_list.json"

SAMPLES = 30000
SAMPLES_BY_QUERY = 500


def configure_proxy():
    proxy = urllib.request.ProxyHandler(urllib.request.getproxies())
    opener = urllib.request.build_opener(proxy)
    urllib.request.install_opener(opener)


def load_from_internet():
    configure_proxy()
    return fetch_multipart_ban_dict(SAMPLES, SAMPLES_BY_QUERY)


def load_from_local():
    with open(BAN_FILE, "r") as ban_dict_file:
        return json.load(ban_dict_file)


def write_to_local(ban_dict_list):
    with open(BAN_FILE, "w") as ban_dict_file:
        json.dump(ban_dict_list, ban_dict_file, indent="\t")


# ban_dict_list = load_from_internet()
# write_to_local(ban_dict_list)

ban_dict_list = load_from_local()

ban_list = map_bans(ban_dict_list)


########## HISTOGRAM ##########

ban_durations = [ban.get_duration() for ban in ban_list]
(ban_durations, bins) = np.histogram(ban_durations, bins=[round(365/12*x) for x in range(1, 50+2)])

print("[INFO]", "Generating histogram")
bar_chart = pygal.Bar(legend_at_bottom=True)
bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % SAMPLES
bar_chart.x_labels = map(str, range(1, len(ban_durations)+1))
bar_chart.add("Number of active bans", ban_durations)
bar_chart.render_to_file(HIST_FILE)
print("[INFO]", "Histogram generation complete")


########## NB BAN MAP ##########

def count_by_country(ban_list):
    country_ban_list = [ban.get_country_code() for ban in ban_list]
    return Counter(country_ban_list)

nb_bans_by_country = count_by_country(ban_list)

print("[INFO]", "Generating ban map")
worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % SAMPLES
worldmap_chart.add("Active bans", nb_bans_by_country)
worldmap_chart.render_to_file(BAN_MAP_FILE)
print("[INFO]", "Ban map generation complete")


########## BAN DURATION MAP ##########

def group_by_country(ban_list):
    ban_duration_by_country = {}

    for ban in ban_list:
        country_code = ban.get_country_code()

        if country_code not in ban_duration_by_country.keys():
            ban_duration_by_country[country_code] = []

        ban_duration_by_country[country_code].append(ban)

    return ban_duration_by_country


def calc_average_ban_by_country(ban_by_country_dict):
    average_ban_duration_by_country = {}

    for country, bans in ban_by_country_dict.items():
        average = np.mean([ban.get_duration() for ban in bans])
        average_ban_duration_by_country[country] = average

    return average_ban_duration_by_country

ban_duration_by_country = group_by_country(ban_list)
average_ban_duration_by_country = calc_average_ban_by_country(ban_duration_by_country)

average_ban_duration_by_country = {country: duration/30 for country, duration in average_ban_duration_by_country.items()}

print("[INFO]", "Generating ban duration map")
worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % SAMPLES
worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country)
worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE)
print("[INFO]", "Ban duration map generation complete")

print("\nTHIS WAS A TRIUMPH!")
print("I'M MAKING A NOTE HERE:")
print("HUGE [SUCCESS]")