plomlompom.com Git - berlin-corona-table/blob - enhance_table.py

   1 #!//usr/bin/env python3
   2
   3 # District population numbers as per Wikipedia.
   4 district_pops = {
   5   'CW': 342332,
   6   'FK': 289762,
   7   'Li': 291452,
   8   'MH': 268548,
   9   'Mi': 384172,
  10   'Ne': 329691,
  11   'Pa': 407765,
  12   'Re': 265225,
  13   'Sp': 243977,
  14   'SZ': 308697,
  15   'TS': 351644,
  16   'TK': 271153,
  17   'sum': 3754418,
  18 }
  19
  20 # Map abbreviations to full names.
  21 translate = {
  22   'CW': 'Charlottenburg-Wilmersdorf',
  23   'FK': 'Friedrichshain-Kreuzberg',
  24   'Li': 'Lichtenberg',
  25   'MH': 'Marzahn-Hellersdorf',
  26   'Mi': 'Mitte',
  27   'Ne': 'Neukölln',
  28   'Pa': 'Pankow',
  29   'Re': 'Reinickendorf',
  30   'Sp': 'Spandau',
  31   'SZ': 'Steglitz-Zehlendorf',
  32   'TS': 'Tempelhof-Schöneberg',
  33   'TK': 'Treptow-Köpenick',
  34   'sum': 'all of Berlin',
  35   '+': 'new infections counted that day',
  36   'Σ': 'sum of new infections for last 7 days',
  37   'Ø': 'per-day average of new infections for last 7 days',
  38   'i': 'incidence (x per 100k inhabitants) of new infections for last 7 days',
  39 }
  40
  41 # Read infections table path and output type.
  42 import sys
  43 if len(sys.argv) != 3:
  44     print('Expecting infections table file path and output type as only arguments.')
  45     exit(1)
  46 infections_table = sys.argv[1]
  47 output_type = sys.argv[2]
  48
  49 # Read infections table file lines.
  50 f = open(infections_table, 'r')
  51 lines = f.readlines()
  52 f.close()
  53
  54 # Basic input validation.
  55 import datetime
  56 header_elements = lines[0].split()
  57 if set(header_elements) != district_pops.keys() or \
  58        len(header_elements) != len(district_pops.keys()):
  59     raise Exception('infections table: invalid header')
  60 line_count = 0
  61 for line in lines[1:]:
  62     line_count += 1
  63     fields = line.split()
  64     if len(header_elements) != len(fields) - 1:
  65         raise Exception('infections table: too many elements on line %s',
  66                         line_count)
  67     try:
  68         datetime.date.fromisoformat(fields[0])
  69     except ValueError:
  70         raise Exception('infections table: bad ISO date on line %s',
  71                         line_count)
  72     for field in fields[1:]:
  73         try:
  74             int(field)
  75         except ValueError:
  76             raise Exception('infections table: bad value on line %s',
  77                             line_count)
  78
  79 # Parse first table file line for the names and order of districts.
  80 db = {}
  81 sorted_districts = []
  82 for header in lines[0].split():
  83     sorted_districts += [header]
  84     db[header] = {}
  85
  86 # Seed DB with daily new infections data per district, per date.
  87 sorted_dates = []
  88 for line in lines[1:]:
  89     fields = line.split()
  90     date = fields[0]
  91     sorted_dates += [date]
  92     for i in range(len(sorted_districts)):
  93         district = sorted_districts[i]
  94         district_data = fields[i + 1]
  95         db[district][date] = {'new_infections': int(district_data)}
  96 sorted_dates.sort()
  97
  98 # In LaGeSo's data, the last "district" is actually the sum of all districts /
  99 # the whole of Berlin.  For our district order, move it in front of the other
 100 # districts, as its numbers are the most interesting, so in the table views
 101 # we want to see it first.
 102 sum_district = sorted_districts.pop()
 103 sorted_districts.insert(0, sum_district)
 104
 105 # Fail on any day where the "sum" district's new infections are not the proper
 106 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
 107 # data that is troubled in this way.  It will then have to be fixed manually in
 108 # the table file, since we should have a human look at what mistake was
 109 # probably made.
 110 for date in sorted_dates:
 111     day_sum = 0
 112     for district in [d for d in sorted_districts if not d==sum_district]:
 113         day_sum += db[district][date]['new_infections']
 114     if day_sum != db[sum_district][date]['new_infections']:
 115         raise Exception('Questionable district infection sum in %s' % date)
 116
 117 # Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
 118 # days that have less than 6 predecessors (we can only know a weekly average if
 119 # we have a whole week of data).
 120 for i in range(len(sorted_dates)):
 121     if i < 6:
 122         continue
 123     date = sorted_dates[i]
 124     week_dates = []
 125     for j in range(7):
 126         week_dates += [sorted_dates[i - j]]
 127     for district in sorted_districts:
 128         district_pop = district_pops[district]
 129         week_sum = 0
 130         for week_date in week_dates:
 131             week_sum += db[district][week_date]['new_infections']
 132         db[district][date]['week_sum'] = week_sum
 133         db[district][date]['week_average'] = week_sum / 7
 134         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
 135
 136 # Optimized for web browser viewing.
 137 import calendar
 138 if output_type == 'html':
 139     print("""<!DOCTYPE html>
 140 <html>
 141 <head>
 142 <style>
 143 th { text-align: left; vertical-align: bottom; }
 144 .vertical_header { writing-mode: vertical-rl; transform: rotate(180deg); font-weight: normal; }
 145 .repeated_head th { padding-top: 0.5em; border-bottom: 1px solid black; }
 146 .bold { font-weight: bold }
 147 .date { vertical-align: top; padding-top: 0.5em; }
 148 </style>
 149 <title>Berlin's Corona infection numbers, development by districts</title>
 150 </head>
 151 <a href="/">home</a> · <a href="/contact.html">contact</a> · <a href="/privacy.html">privacy</a>
 152 <h1>Berlin's Corona infection numbers, development by districts</h1>
 153 <p>Updated daily at 7pm based on data from the "Senatsverwaltung für Gesundheit, Pflege und Gleichstellung". <a href="https://plomlompom.com/repos/?p=berlin-corona-table">Source code</a>. <a href="berlin_corona.txt">Plain text view (optimized for terminal curl)</a>.</p>
 154 <table>
 155 <tr>
 156 <th colspan=2></th>""")
 157     sorted_dates.reverse()
 158     for district in sorted_districts:
 159         # Wrap in div because the vertical orientation otherwise fails
 160         # in Chromium.
 161         print('<th><div class="vertical_header">%s</div></th>' %
 162               translate[district])
 163     print('</tr>')
 164     weekday_count = 0
 165     for date in sorted_dates:
 166         if weekday_count == 0:
 167             print('<tr class="repeated_head">')
 168             print('<th>date</th>')
 169             print('<th><a href="#symbols">?</a></th>')
 170             for district in sorted_districts:
 171                 print('<th><abbr title="%s">%s</abbr></th>' %
 172                       (translate[district], district))
 173             print('</tr>')
 174         print('<tr class="day_row">')
 175         weekday = calendar.day_name[datetime.date.fromisoformat(date).weekday()]
 176         print('<td class="date">%s<br />%s</td>' % (date, weekday))
 177         print('<td><table>')
 178         for abbr in ['+', 'Σ', 'Ø', 'i']:
 179             print('<tr><th><abbr title="%s">%s</abbr></th></tr>' %
 180                   (translate[abbr], abbr))
 181         print('</table></td>')
 182         for district in sorted_districts:
 183             district_data = db[district][date]
 184             week_sum = week_avg = week_inc = '?'
 185             new_infections = district_data['new_infections']
 186             if 'week_sum' in district_data:
 187                 week_sum = '%s' % district_data['week_sum']
 188             if 'week_average' in district_data:
 189                 week_avg = '%.1f' % district_data['week_average']
 190             if 'week_incidence' in district_data:
 191                 week_inc = '%.1f' % district_data['week_incidence']
 192             print('<td>')
 193             print('<table>')
 194             print('<tr><td class="bold">%s</td></tr>' % new_infections)
 195             print('<tr><td>%s</td></tr>' % week_sum)
 196             print('<tr><td>%s</td></tr>' % week_avg)
 197             print('<tr><td>%s</td></tr>' % week_inc)
 198             print('</table>')
 199             print('</td>')
 200         print('</tr>')
 201         weekday_count += 1
 202         if weekday_count != 7:
 203             continue
 204         weekday_count = 0
 205     print('</table>')
 206     print('<h3 id="symbols">Symbols</h3>')
 207     print('<dl>')
 208     for abbr in ['+', 'Σ', 'Ø', 'i']:
 209         print('<dt>%s</dt><dd>%s</dd>' % (abbr, translate[abbr]))
 210     print('</dl>')
 211     print('</html>')
 212
 213 # Optimized for in-terminal curl.
 214 elif output_type == 'txt':
 215
 216     # Explain what this is.
 217     intro = \
 218 """Table of Berlin's Corona infection number development by districts.
 219 Updated daily at 7pm based on data from the "Senatsverwaltung für Gesundheit, Pflege und Gleichstellung".
 220
 221 Abbrevations/explanations:
 222 """
 223     for k in translate:
 224         intro += "%s: %s\n" % (k, translate[k])
 225     intro += """
 226 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
 227 HTML view: https://plomlompom.com/berlin_corona.html"""
 228     print(intro)
 229
 230     # Output table of enhanced daily infection data, newest on top,
 231     # separated into 7-day units.
 232     sorted_dates.reverse()
 233     weekday_count = 0
 234     for date in sorted_dates:
 235
 236         # Week table header.
 237         if weekday_count == 0:
 238             print()
 239             print(' '*13, '   '.join(sorted_districts))
 240             print('-'*77)
 241
 242         # Day table.
 243         weekday = calendar.day_name[datetime.date.fromisoformat(date).weekday()]
 244         print('%s (%s)' % (date, weekday))
 245         new_infections = []
 246         weekly_sum_strings = []
 247         weekly_avg_strings = []
 248         weekly_inc_strings = []
 249         for district in sorted_districts:
 250             district_day_data = db[district][date]
 251             new_infections += [district_day_data['new_infections']]
 252             wsum_string = ' '*3 + '?'
 253             wavg_string = winc_string = ' '*4 + '?'
 254             if 'week_sum' in district_day_data:
 255                 wsum_string = '%4s' % district_day_data['week_sum']
 256             weekly_sum_strings += [wsum_string]
 257             if 'week_average' in district_day_data:
 258                 wavg_string = '%5.1f' % district_day_data['week_average']
 259             weekly_avg_strings += [wavg_string]
 260             if 'week_incidence' in district_day_data:
 261                 winc_string = '%5.1f' % district_day_data['week_incidence']
 262             weekly_inc_strings += [winc_string]
 263         print('+', ' '*11, '  '.join(['%3s' % i for i in new_infections]))
 264         print('Σ', ' '*10, ' '.join(weekly_sum_strings))
 265         print('Ø', ' '*9, ''.join(weekly_avg_strings))
 266         print('i', ' '*9, ''.join(weekly_inc_strings))
 267         weekday_count += 1
 268         if weekday_count != 7:
 269             continue
 270         weekday_count = 0