home · contact · privacy
Use long form district names in HTML variant.
[berlin-corona-table] / enhance_table.py
1 #!//usr/bin/env python3
2
3 # District population numbers as per Wikipedia.
4 district_pops = {
5   'CW': 342332,
6   'FK': 289762,
7   'Li': 291452,
8   'MH': 268548,
9   'Mi': 384172,
10   'Ne': 329691,
11   'Pa': 407765,
12   'Re': 265225,
13   'Sp': 243977,
14   'SZ': 308697,
15   'TS': 351644,
16   'TK': 271153,
17   'sum': 3754418,
18 }
19
20 # Map abbreviations to full names.
21 translate = {
22   'CW': 'Charlottenburg-Wilmersdorf',
23   'FK': 'Friedrichshain-Kreuzberg',
24   'Li': 'Lichtenberg',
25   'MH': 'Marzahn-Hellersdorf',
26   'Mi': 'Mitte',
27   'Ne': 'Neukölln',
28   'Pa': 'Pankow',
29   'Re': 'Reinickendorf',
30   'Sp': 'Spandau',
31   'SZ': 'Steglitz-Zehlendorf',
32   'TS': 'Tempelhof-Schöneberg',
33   'TK': 'Treptow-Köpenick',
34   'sum': 'all of Berlin',
35 }
36
37 # Read infections table path and output type.
38 import sys
39 if len(sys.argv) != 3:
40     print('Expecting infections table file path and output type as only arguments.')
41     exit(1)
42 infections_table = sys.argv[1]
43 output_type = sys.argv[2]
44
45 # Read infections table file lines.
46 f = open(infections_table, 'r')
47 lines = f.readlines()
48 f.close()
49
50 # Basic input validation.
51 import datetime
52 header_elements = lines[0].split()
53 if set(header_elements) != district_pops.keys() or \
54        len(header_elements) != len(district_pops.keys()):
55     raise Exception('infections table: invalid header')
56 line_count = 0
57 for line in lines[1:]:
58     line_count += 1
59     fields = line.split()
60     if len(header_elements) != len(fields) - 1:
61         raise Exception('infections table: too many elements on line %s',
62                         line_count)
63     try:
64         datetime.date.fromisoformat(fields[0])
65     except ValueError:
66         raise Exception('infections table: bad ISO date on line %s',
67                         line_count)
68     for field in fields[1:]:
69         try:
70             int(field)
71         except ValueError:
72             raise Exception('infections table: bad value on line %s',
73                             line_count)
74
75 # Parse first table file line for the names and order of districts.
76 db = {}
77 sorted_districts = []
78 for header in lines[0].split():
79     sorted_districts += [header]
80     db[header] = {}
81
82 # Seed DB with daily new infections data per district, per date.
83 sorted_dates = []
84 for line in lines[1:]:
85     fields = line.split()
86     date = fields[0]
87     sorted_dates += [date]
88     for i in range(len(sorted_districts)):
89         district = sorted_districts[i]
90         district_data = fields[i + 1]
91         db[district][date] = {'new_infections': int(district_data)}
92 sorted_dates.sort()
93
94 # In LaGeSo's data, the last "district" is actually the sum of all districts /
95 # the whole of Berlin.
96 #
97 # Fail on any day where the "sum" district's new infections are not the proper
98 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
99 # data that is troubled in this way.  It will then have to be fixed manually in
100 # the table file, since we should have a human look at what mistake was
101 # probably made.
102 for date in sorted_dates:
103     sum_district = sorted_districts[-1]
104     day_sum = 0
105     for district in sorted_districts[:-1]:
106         day_sum += db[district][date]['new_infections']
107     if day_sum != db[sum_district][date]['new_infections']:
108         raise Exception('Questionable district infection sum in %s' % date)
109
110 # Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
111 # days that have less than 6 predecessors (we can only know a weekly average if
112 # we have a whole week of data).
113 for i in range(len(sorted_dates)):
114     if i < 6:
115         continue
116     date = sorted_dates[i]
117     week_dates = []
118     for j in range(7):
119         week_dates += [sorted_dates[i - j]]
120     for district in sorted_districts:
121         district_pop = district_pops[district]
122         week_sum = 0
123         for week_date in week_dates:
124             week_sum += db[district][week_date]['new_infections']
125         db[district][date]['week_sum'] = week_sum
126         db[district][date]['week_average'] = week_sum / 7
127         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
128
129 # Optimized for web browser viewing.
130 if output_type == 'html':
131     print('<html>')
132     print('<style>')
133     print('table, tr, th, td { border: 1px solid black; }')
134     print('</style>')
135     print('<table>')
136     print('<tr>')
137     print('<th>date</th>')
138     for district in sorted_districts:
139         long_form = translate[district]
140         print('<th style="writing-mode: vertical-rl; '
141               'transform: rotate(180deg);">%s</th>' % long_form)
142     print('</tr>')
143     sorted_dates.reverse()
144     for date in sorted_dates:
145         print('<tr>')
146         print('<td>%s</td>' % date)
147         for district in sorted_districts:
148             district_data = db[district][date]
149             week_sum = week_avg = week_inc = ''
150             new_infections = district_data['new_infections']
151             if 'week_sum' in district_data:
152                 week_sum = '%s' % district_data['week_sum']
153             if 'week_average' in district_data:
154                 week_avg = '%.1f' % district_data['week_average']
155             if 'week_incidence' in district_data:
156                 week_inc = '%.1f' % district_data['week_incidence']
157             print('<td>')
158             print('<table>')
159             print('<tr><th>new</th><td>%s</td></tr>' % new_infections)
160             print('<tr><th>wsum</th><td>%s</td></tr>' % week_sum)
161             print('<tr><th>wavg</th><td>%s</td></tr>' % week_avg)
162             print('<tr><th>winc</th><td>%s</td></tr>' % week_inc)
163             print('</table>')
164             print('</td>')
165         print('</tr>')
166     print('</table>')
167     print('</html>')
168
169 # Optimized for in-terminal curl.
170 elif output_type == 'txt':
171
172     # Explain what this is.
173     intro = \
174 """Table of Berlin's Corona infection number development by districts.
175 Updated daily around 9pm.
176
177 Abbrevations/explanations:
178 """
179     for k in translate:
180         intro += "%s: %s\n" % (k, translate[k])
181     intro += """wsum: sum for last 7 days
182 wavg: per-day average of new infections for last 7 days
183 winc: incidence (x per 100k inhabitants) of new infections for last 7 days
184
185 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
186 """
187     print(intro)
188
189     # Output table of enhanced daily infection data, newest on top,
190     # separated into 7-day units.
191     sorted_dates.reverse()
192     weekday_count = 0
193     sum_district = sorted_districts[-1]
194     for date in sorted_dates:
195
196         # Week table header.
197         if weekday_count == 0:
198             print(' '*11, '  '.join(sorted_districts[:-1]),
199                   sorted_districts[-1], 'wsum', ' wavg', 'winc')
200             week_start_date = date
201
202         # Day data line.
203         new_infections = []
204         for district in sorted_districts:
205             new_infections += [db[district][date]['new_infections']]
206         week_sum = week_avg = week_inc = ''
207         sum_district_data = db[sum_district][date]
208         if 'week_sum' in sum_district_data:
209             week_sum = '%4s' % sum_district_data['week_sum']
210         if 'week_average' in sum_district_data:
211             week_avg = '%5.1f' % sum_district_data['week_average']
212         if 'week_incidence' in sum_district_data:
213             week_inc = '%4.1f' % sum_district_data['week_incidence']
214         print(date, ' '.join(['%3s' % infections
215                               for infections in new_infections]),
216               week_sum, week_avg, week_inc)
217
218         # Maintain 7-day cycle.
219         weekday_count += 1
220         if weekday_count != 7:
221             continue
222         weekday_count = 0
223
224         # After each 7 days, print summary for individual districts.
225         weekly_sums = []
226         weekly_avgs = []
227         weekly_incs = []
228         for district in sorted_districts[:-1]:
229             weekly_sums += [db[district][week_start_date]['week_sum']]
230             weekly_avgs += [db[district][week_start_date]['week_average']]
231             weekly_incs += [db[district][week_start_date]['week_incidence']]
232         print()
233         print('district stats for week from %s to %s:' % (date, week_start_date))
234         print(' '*7, '    '.join(sorted_districts[:-1]))
235         print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
236         print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
237         print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
238         print()