home · contact · privacy
Remove redundant whitespace.
[berlin-corona-table] / enhance_table.py
1 #!//usr/bin/env python3
2
3 # District population numbers as per Wikipedia.
4 district_pops = {
5   'CW': 342332,
6   'FK': 289762,
7   'Li': 291452,
8   'MH': 268548,
9   'Mi': 384172,
10   'Ne': 329691,
11   'Pa': 407765,
12   'Re': 265225,
13   'Sp': 243977,
14   'SZ': 308697,
15   'TS': 351644,
16   'TK': 271153,
17   'sum': 3754418,
18 }
19
20 f = open('daily_infections_table.txt', 'r')
21 lines = f.readlines()
22 f.close()
23
24 # Parse first table file line for the names and order of districts. 
25 db = {}
26 sorted_districts = []
27 for header in lines[0].split():
28     sorted_districts += [header]
29     db[header] = {}
30
31 # Seed DB with daily new infections data per district, per date.
32 sorted_dates = []
33 for line in lines[1:]:
34     fields = line.split()
35     date = fields[0]
36     sorted_dates += [date]
37     for i in range(len(sorted_districts)):
38         district = sorted_districts[i]
39         district_data = fields[i + 1]
40         db[district][date] = {'new_infections': int(district_data)}
41 sorted_dates.sort()
42
43 # Fail on any day where the "sum" district's new infections are not the proper
44 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
45 # data that is troubled in this way.  It will then have to be fixed manually in
46 # the table file, since we should have a human look at what mistake was
47 # probably made. 
48 for date in sorted_dates:
49     sum_district = sorted_districts[-1]
50     day_sum = 0
51     for district in sorted_districts[:-1]:
52         day_sum += db[district][date]['new_infections']
53     if day_sum != db[sum_district][date]['new_infections']:
54         raise Exception('Questionable district infection sum in %s' % date)
55
56 # Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
57 # days that have less than 6 predecessors (we can only know a weekly average if
58 # we have a whole week of data).
59 for i in range(len(sorted_dates)):
60     if i < 6:
61         continue
62     date = sorted_dates[i]
63     week_dates = []
64     for j in range(7):
65         week_dates += [sorted_dates[i - j]]
66     for district in sorted_districts:
67         district_pop = district_pops[district]
68         week_sum = 0
69         for week_date in week_dates:
70             week_sum += db[district][week_date]['new_infections']
71         db[district][date]['week_sum'] = week_sum
72         db[district][date]['week_average'] = week_sum / 7
73         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
74
75 # Explain what this is.
76 intro = """Table of Berlin's Corona infection number development by districts.
77 Updated daily around 9pm.
78
79 Abbrevations/explanations:
80
81 CW: Charlottenburg-Wilmersdorf
82 FK: Friedrichshain-Kreuzberg
83 Li: Lichtenberg
84 MH: Marzahn-Hellersdorf
85 Mi: Mitte
86 Ne: Neukölln
87 Pa: Pankow
88 Re: Reinickendorf
89 Sp: Spandau
90 SZ: Steglitz-Zehlendorf
91 TS: Tempelhof-Schöneberg
92 TK: Treptow-Köpenick
93 sum: sum for all the districts
94 wsum: sum for last 7 days
95 wavg: per-day average of new infections for last 7 days
96 winc: incidence (x per 100k inhabitants) of new infections for last 7 days
97
98 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
99 """
100 print(intro)
101
102 # Output table of enhanced daily infection data, newest on top, separated into
103 # 7-day units.
104 sorted_dates.reverse()
105 weekday_count = 0
106 for date in sorted_dates:
107
108     # Week table header.
109     if weekday_count == 0:
110         print(' '*11, '  '.join(sorted_districts[:-1]),
111               sorted_districts[-1], 'wsum', ' wavg', 'winc')
112         week_start_date = date
113
114     # Day data line.
115     new_infections = []
116     for district in sorted_districts:
117         new_infections += [db[district][date]['new_infections']]
118     week_sum = week_avg = week_inc = ''
119     sum_district = sorted_districts[-1]
120     sum_district_data = db[sum_district][date]
121     if 'week_sum' in sum_district_data:
122         week_sum = '%4s' % sum_district_data['week_sum']
123     if 'week_average' in sum_district_data:
124         week_avg = '%5.1f' % sum_district_data['week_average']
125     if 'week_incidence' in sum_district_data:
126         week_inc = '%4.1f' % sum_district_data['week_incidence']
127     print(date, ' '.join(['%3s' % infections for infections in new_infections]),
128           week_sum, week_avg, week_inc)
129
130     # Maintain 7-day cycle.
131     weekday_count += 1
132     if weekday_count != 7:
133         continue
134     weekday_count = 0
135
136     # After each 7 days, print summary for individual districts.
137     weekly_sums = []
138     weekly_avgs = []
139     weekly_incs = []
140     for district in sorted_districts[:-1]:
141         weekly_sums += [db[district][week_start_date]['week_sum']]
142         weekly_avgs += [db[district][week_start_date]['week_average']]
143         weekly_incs += [db[district][week_start_date]['week_incidence']]
144     print()
145     print('district stats for week from %s to %s:' % (date, week_start_date))
146     print(' '*7, '    '.join(sorted_districts[:-1]))
147     print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
148     print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
149     print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
150     print()