home · contact · privacy
02a7ac9b30b8cd0375a79a49468e2dc5c19e3d77
[berlin-corona-table] / enhance_table.py
1 #!//usr/bin/env python3
2
3 # District population numbers as per Wikipedia.
4 district_pops = {
5   'CW': 342332,
6   'FK': 289762,
7   'Li': 291452,
8   'MH': 268548,
9   'Mi': 384172,
10   'Ne': 329691,
11   'Pa': 407765,
12   'Re': 265225,
13   'Sp': 243977,
14   'SZ': 308697,
15   'TS': 351644,
16   'TK': 271153,
17   'sum': 3754418,
18 }
19
20 # Read infections table path and output type.
21 import sys
22 if len(sys.argv) != 3:
23     print('Expecting infections table file path and output type as only arguments.')
24     exit(1)
25 infections_table = sys.argv[1]
26 output_type = sys.argv[2]
27
28 # Read infections table file lines.
29 f = open(infections_table, 'r')
30 lines = f.readlines()
31 f.close()
32
33 # Basic input validation.
34 import datetime
35 header_elements = lines[0].split()
36 if set(header_elements) != district_pops.keys() or \
37        len(header_elements) != len(district_pops.keys()):
38     raise Exception('infections table: invalid header')
39 line_count = 0
40 for line in lines[1:]:
41     line_count += 1
42     fields = line.split()
43     if len(header_elements) != len(fields) - 1:
44         raise Exception('infections table: too many elements on line %s',
45                         line_count)
46     try:
47         datetime.date.fromisoformat(fields[0])
48     except ValueError:
49         raise Exception('infections table: bad ISO date on line %s',
50                         line_count)
51     for field in fields[1:]:
52         try:
53             int(field)
54         except ValueError:
55             raise Exception('infections table: bad value on line %s',
56                             line_count)
57
58 # Parse first table file line for the names and order of districts.
59 db = {}
60 sorted_districts = []
61 for header in lines[0].split():
62     sorted_districts += [header]
63     db[header] = {}
64
65 # Seed DB with daily new infections data per district, per date.
66 sorted_dates = []
67 for line in lines[1:]:
68     fields = line.split()
69     date = fields[0]
70     sorted_dates += [date]
71     for i in range(len(sorted_districts)):
72         district = sorted_districts[i]
73         district_data = fields[i + 1]
74         db[district][date] = {'new_infections': int(district_data)}
75 sorted_dates.sort()
76
77 # In LaGeSo's data, the last "district" is actually the sum of all districts /
78 # the whole of Berlin.
79 #
80 # Fail on any day where the "sum" district's new infections are not the proper
81 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
82 # data that is troubled in this way.  It will then have to be fixed manually in
83 # the table file, since we should have a human look at what mistake was
84 # probably made.
85 for date in sorted_dates:
86     sum_district = sorted_districts[-1]
87     day_sum = 0
88     for district in sorted_districts[:-1]:
89         day_sum += db[district][date]['new_infections']
90     if day_sum != db[sum_district][date]['new_infections']:
91         raise Exception('Questionable district infection sum in %s' % date)
92
93 # Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
94 # days that have less than 6 predecessors (we can only know a weekly average if
95 # we have a whole week of data).
96 for i in range(len(sorted_dates)):
97     if i < 6:
98         continue
99     date = sorted_dates[i]
100     week_dates = []
101     for j in range(7):
102         week_dates += [sorted_dates[i - j]]
103     for district in sorted_districts:
104         district_pop = district_pops[district]
105         week_sum = 0
106         for week_date in week_dates:
107             week_sum += db[district][week_date]['new_infections']
108         db[district][date]['week_sum'] = week_sum
109         db[district][date]['week_average'] = week_sum / 7
110         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
111
112 # Optimized for web browser viewing.
113 if output_type == 'html':
114     print('<html>')
115     print('<style>')
116     print('table, tr, th, td { border: 1px solid black; }')
117     print('</style>')
118     print('<table>')
119     print('<tr>')
120     print('<th>date</th>')
121     for district in sorted_districts:
122         print('<th>%s</th>' % district)
123     print('</tr>')
124     sorted_dates.reverse()
125     for date in sorted_dates:
126         print('<tr>')
127         print('<td>%s</td>' % date)
128         for district in sorted_districts:
129             district_data = db[district][date]
130             week_sum = week_avg = week_inc = ''
131             new_infections = district_data['new_infections']
132             if 'week_sum' in district_data:
133                 week_sum = '%s' % district_data['week_sum']
134             if 'week_average' in district_data:
135                 week_avg = '%.1f' % district_data['week_average']
136             if 'week_incidence' in district_data:
137                 week_inc = '%.1f' % district_data['week_incidence']
138             print('<td>')
139             print('<table>')
140             print('<tr><th>new</th><td>%s</td></tr>' % new_infections)
141             print('<tr><th>wsum</th><td>%s</td></tr>' % week_sum)
142             print('<tr><th>wavg</th><td>%s</td></tr>' % week_avg)
143             print('<tr><th>winc</th><td>%s</td></tr>' % week_inc)
144             print('</table>')
145             print('</td>')
146         print('</tr>')
147     print('</table>')
148     print('</html>')
149
150 # Optimized for in-terminal curl.
151 elif output_type == 'txt':
152
153     # Explain what this is.
154     intro = \
155 """Table of Berlin's Corona infection number development by districts.
156 Updated daily around 9pm.
157
158 Abbrevations/explanations:
159
160 CW: Charlottenburg-Wilmersdorf
161 FK: Friedrichshain-Kreuzberg
162 Li: Lichtenberg
163 MH: Marzahn-Hellersdorf
164 Mi: Mitte
165 Ne: Neukölln
166 Pa: Pankow
167 Re: Reinickendorf
168 Sp: Spandau
169 SZ: Steglitz-Zehlendorf
170 TS: Tempelhof-Schöneberg
171 TK: Treptow-Köpenick
172 sum: sum for all the districts
173 wsum: sum for last 7 days
174 wavg: per-day average of new infections for last 7 days
175 winc: incidence (x per 100k inhabitants) of new infections for last 7 days
176
177 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
178 """
179     print(intro)
180
181     # Output table of enhanced daily infection data, newest on top,
182     # separated into 7-day units.
183     sorted_dates.reverse()
184     weekday_count = 0
185     sum_district = sorted_districts[-1]
186     for date in sorted_dates:
187
188         # Week table header.
189         if weekday_count == 0:
190             print(' '*11, '  '.join(sorted_districts[:-1]),
191                   sorted_districts[-1], 'wsum', ' wavg', 'winc')
192             week_start_date = date
193
194         # Day data line.
195         new_infections = []
196         for district in sorted_districts:
197             new_infections += [db[district][date]['new_infections']]
198         week_sum = week_avg = week_inc = ''
199         sum_district_data = db[sum_district][date]
200         if 'week_sum' in sum_district_data:
201             week_sum = '%4s' % sum_district_data['week_sum']
202         if 'week_average' in sum_district_data:
203             week_avg = '%5.1f' % sum_district_data['week_average']
204         if 'week_incidence' in sum_district_data:
205             week_inc = '%4.1f' % sum_district_data['week_incidence']
206         print(date, ' '.join(['%3s' % infections
207                               for infections in new_infections]),
208               week_sum, week_avg, week_inc)
209
210         # Maintain 7-day cycle.
211         weekday_count += 1
212         if weekday_count != 7:
213             continue
214         weekday_count = 0
215
216         # After each 7 days, print summary for individual districts.
217         weekly_sums = []
218         weekly_avgs = []
219         weekly_incs = []
220         for district in sorted_districts[:-1]:
221             weekly_sums += [db[district][week_start_date]['week_sum']]
222             weekly_avgs += [db[district][week_start_date]['week_average']]
223             weekly_incs += [db[district][week_start_date]['week_incidence']]
224         print()
225         print('district stats for week from %s to %s:' % (date, week_start_date))
226         print(' '*7, '    '.join(sorted_districts[:-1]))
227         print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
228         print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
229         print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
230         print()