home · contact · privacy
Replace "not enough data" string for space reasons.
[berlin-corona-table] / enhance_table.py
1 #!//usr/bin/env python3
2
3 # District population numbers as per Wikipedia.
4 district_pops = {
5   'CW': 342332,
6   'FK': 289762,
7   'Li': 291452,
8   'MH': 268548,
9   'Mi': 384172,
10   'Ne': 329691,
11   'Pa': 407765,
12   'Re': 265225,
13   'Sp': 243977,
14   'SZ': 308697,
15   'TS': 351644,
16   'TK': 271153,
17   'sum': 3754418,
18 }
19
20 # Map abbreviations to full names.
21 translate = {
22   'CW': 'Charlottenburg-Wilmersdorf',
23   'FK': 'Friedrichshain-Kreuzberg',
24   'Li': 'Lichtenberg',
25   'MH': 'Marzahn-Hellersdorf',
26   'Mi': 'Mitte',
27   'Ne': 'Neukölln',
28   'Pa': 'Pankow',
29   'Re': 'Reinickendorf',
30   'Sp': 'Spandau',
31   'SZ': 'Steglitz-Zehlendorf',
32   'TS': 'Tempelhof-Schöneberg',
33   'TK': 'Treptow-Köpenick',
34   'sum': 'all of Berlin',
35   'wsum': 'sum for last 7 days',
36   'wavg': 'per-day average of new infections for last 7 days',
37   'winc': 'incidence (x per 100k inhabitants) of new infections for last 7 days',
38 }
39
40 # Read infections table path and output type.
41 import sys
42 if len(sys.argv) != 3:
43     print('Expecting infections table file path and output type as only arguments.')
44     exit(1)
45 infections_table = sys.argv[1]
46 output_type = sys.argv[2]
47
48 # Read infections table file lines.
49 f = open(infections_table, 'r')
50 lines = f.readlines()
51 f.close()
52
53 # Basic input validation.
54 import datetime
55 header_elements = lines[0].split()
56 if set(header_elements) != district_pops.keys() or \
57        len(header_elements) != len(district_pops.keys()):
58     raise Exception('infections table: invalid header')
59 line_count = 0
60 for line in lines[1:]:
61     line_count += 1
62     fields = line.split()
63     if len(header_elements) != len(fields) - 1:
64         raise Exception('infections table: too many elements on line %s',
65                         line_count)
66     try:
67         datetime.date.fromisoformat(fields[0])
68     except ValueError:
69         raise Exception('infections table: bad ISO date on line %s',
70                         line_count)
71     for field in fields[1:]:
72         try:
73             int(field)
74         except ValueError:
75             raise Exception('infections table: bad value on line %s',
76                             line_count)
77
78 # Parse first table file line for the names and order of districts.
79 db = {}
80 sorted_districts = []
81 for header in lines[0].split():
82     sorted_districts += [header]
83     db[header] = {}
84
85 # Seed DB with daily new infections data per district, per date.
86 sorted_dates = []
87 for line in lines[1:]:
88     fields = line.split()
89     date = fields[0]
90     sorted_dates += [date]
91     for i in range(len(sorted_districts)):
92         district = sorted_districts[i]
93         district_data = fields[i + 1]
94         db[district][date] = {'new_infections': int(district_data)}
95 sorted_dates.sort()
96
97 # In LaGeSo's data, the last "district" is actually the sum of all districts /
98 # the whole of Berlin.
99 #
100 # Fail on any day where the "sum" district's new infections are not the proper
101 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
102 # data that is troubled in this way.  It will then have to be fixed manually in
103 # the table file, since we should have a human look at what mistake was
104 # probably made.
105 for date in sorted_dates:
106     sum_district = sorted_districts[-1]
107     day_sum = 0
108     for district in sorted_districts[:-1]:
109         day_sum += db[district][date]['new_infections']
110     if day_sum != db[sum_district][date]['new_infections']:
111         raise Exception('Questionable district infection sum in %s' % date)
112
113 # Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
114 # days that have less than 6 predecessors (we can only know a weekly average if
115 # we have a whole week of data).
116 for i in range(len(sorted_dates)):
117     if i < 6:
118         continue
119     date = sorted_dates[i]
120     week_dates = []
121     for j in range(7):
122         week_dates += [sorted_dates[i - j]]
123     for district in sorted_districts:
124         district_pop = district_pops[district]
125         week_sum = 0
126         for week_date in week_dates:
127             week_sum += db[district][week_date]['new_infections']
128         db[district][date]['week_sum'] = week_sum
129         db[district][date]['week_average'] = week_sum / 7
130         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
131
132 # Optimized for web browser viewing.
133 if output_type == 'html':
134     print("""<!DOCTYPE html>
135 <html>
136 <head>
137 <style>
138 .day_row:nth-child(7n+3) > td { border-top: 1px solid black; }
139 .vertical_header { writing-mode: vertical-rl; transform: rotate(180deg); font-weight: normal; }
140 .fixed_head { position: sticky; top: 0; background-color: white; }
141 .bold { font-weight: bold }
142 </style>
143 <title>Berlin's Corona infection numbers, development by districts</title>
144 </head>
145 <a href="/">home</a> · <a href="/contact.html">contact</a> · <a href="/privacy.html">privacy</a>
146 <h1>Berlin's Corona infection numbers, development by districts</h1>
147 <p>Updated daily at 9pm based on data from the "Senatsverwaltung für Gesundheit, Pflege und Gleichstellung". <a href="https://plomlompom.com/repos/?p=berlin-corona-table">Source code</a>. <a href="berlin_corona.txt">Text view optimized for terminal curl</a>.</p>
148 <table>
149 <tr>
150 <th colspan=2></th>""")
151     sorted_dates.reverse()
152     long_wsum = translate['wsum']
153     long_wavg = translate['wavg']
154     long_winc = translate['winc']
155     sum_district = sorted_districts[-1]
156     for district in sorted_districts:
157         long_form = translate[district]
158         # Wrap in div because the vertical orientation otherwise fails
159         # in Chromium.
160         print('<th><div class="vertical_header">%s</div></th>' % long_form)
161     print('</tr>')
162     print('<tr class="fixed_head">')
163     # In Chromium, the th only stay fixed if also given this class.
164     print('<th class="fixed_head">date</th>')
165     print('<th class="fixed_head"></th>')
166     for district in sorted_districts:
167         print('<th class="fixed_head">%s</th>' % district)
168     print('</tr>')
169     for date in sorted_dates:
170         print('<tr class="day_row">')
171         print('<td>%s</td>' % date)
172         print('<td><table>')
173         print('<tr><th><abbr title="new">+<abbr></th></tr>')
174         print('<tr><th><abbr title="%s">Σ</abbr></th></tr>' % long_wsum)
175         print('<tr><th><abbr title="%s">Ø</abbr></th></tr>' % long_wavg)
176         print('<tr><th><abbr title="%s">i</abbr></th></tr>' % long_winc)
177         print('</table></td>')
178         for district in sorted_districts:
179             district_data = db[district][date]
180             week_sum = week_avg = week_inc = '?'
181             new_infections = district_data['new_infections']
182             if 'week_sum' in district_data:
183                 week_sum = '%s' % district_data['week_sum']
184             if 'week_average' in district_data:
185                 week_avg = '%.1f' % district_data['week_average']
186             if 'week_incidence' in district_data:
187                 week_inc = '%.1f' % district_data['week_incidence']
188             print('<td>')
189             print('<table>')
190             print('<tr><td class="bold">%s</td></tr>' % new_infections)
191             print('<tr><td>%s</td></tr>' % week_sum)
192             print('<tr><td>%s</td></tr>' % week_avg)
193             print('<tr><td>%s</td></tr>' % week_inc)
194             print('</table>')
195             print('</td>')
196         print('</tr>')
197     print('</table>')
198     print('</html>')
199
200 # Optimized for in-terminal curl.
201 elif output_type == 'txt':
202
203     # Explain what this is.
204     intro = \
205 """Table of Berlin's Corona infection number development by districts.
206 Updated daily at 9pm based on data from the "Senatsverwaltung für Gesundheit, Pflege und Gleichstellung".
207
208 Abbrevations/explanations:
209 """
210     for k in translate:
211         intro += "%s: %s\n" % (k, translate[k])
212     intro += """
213 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
214
215 HTML view: https://plomlompom.com/berlin_corona.html
216 """
217     print(intro)
218
219     # Output table of enhanced daily infection data, newest on top,
220     # separated into 7-day units.
221     sorted_dates.reverse()
222     weekday_count = 0
223     sum_district = sorted_districts[-1]
224     for date in sorted_dates:
225
226         # Week table header.
227         if weekday_count == 0:
228             print(' '*11, '  '.join(sorted_districts[:-1]),
229                   sorted_districts[-1], 'wsum', ' wavg', 'winc')
230             week_start_date = date
231
232         # Day data line.
233         new_infections = []
234         for district in sorted_districts:
235             new_infections += [db[district][date]['new_infections']]
236         week_sum = week_avg = week_inc = ''
237         sum_district_data = db[sum_district][date]
238         if 'week_sum' in sum_district_data:
239             week_sum = '%4s' % sum_district_data['week_sum']
240         if 'week_average' in sum_district_data:
241             week_avg = '%5.1f' % sum_district_data['week_average']
242         if 'week_incidence' in sum_district_data:
243             week_inc = '%4.1f' % sum_district_data['week_incidence']
244         print(date, ' '.join(['%3s' % infections
245                               for infections in new_infections]),
246               week_sum, week_avg, week_inc)
247
248         # Maintain 7-day cycle.
249         weekday_count += 1
250         if weekday_count != 7:
251             continue
252         weekday_count = 0
253
254         # After each 7 days, print summary for individual districts.
255         weekly_sums = []
256         weekly_avgs = []
257         weekly_incs = []
258         for district in sorted_districts[:-1]:
259             weekly_sums += [db[district][week_start_date]['week_sum']]
260             weekly_avgs += [db[district][week_start_date]['week_average']]
261             weekly_incs += [db[district][week_start_date]['week_incidence']]
262         print()
263         print('district stats for week from %s to %s:' % (date, week_start_date))
264         print(' '*7, '    '.join(sorted_districts[:-1]))
265         print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
266         print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
267         print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
268         print()