plomlompom.com Git - berlin-corona-table/blob - enhance_table.py

   1 #!//usr/bin/env python3
   2
   3 # District population numbers as per Wikipedia.
   4 district_pops = {
   5   'CW': 342332,
   6   'FK': 289762,
   7   'Li': 291452,
   8   'MH': 268548,
   9   'Mi': 384172,
  10   'Ne': 329691,
  11   'Pa': 407765,
  12   'Re': 265225,
  13   'Sp': 243977,
  14   'SZ': 308697,
  15   'TS': 351644,
  16   'TK': 271153,
  17   'sum': 3754418,
  18 }
  19
  20 # Map abbreviations to full names.
  21 translate = {
  22   'CW': 'Charlottenburg-Wilmersdorf',
  23   'FK': 'Friedrichshain-Kreuzberg',
  24   'Li': 'Lichtenberg',
  25   'MH': 'Marzahn-Hellersdorf',
  26   'Mi': 'Mitte',
  27   'Ne': 'Neukölln',
  28   'Pa': 'Pankow',
  29   'Re': 'Reinickendorf',
  30   'Sp': 'Spandau',
  31   'SZ': 'Steglitz-Zehlendorf',
  32   'TS': 'Tempelhof-Schöneberg',
  33   'TK': 'Treptow-Köpenick',
  34   'sum': 'all of Berlin',
  35   'wsum': 'sum for last 7 days',
  36   'wavg': 'per-day average of new infections for last 7 days',
  37   'winc': 'incidence (x per 100k inhabitants) of new infections for last 7 days',
  38 }
  39
  40 # Read infections table path and output type.
  41 import sys
  42 if len(sys.argv) != 3:
  43     print('Expecting infections table file path and output type as only arguments.')
  44     exit(1)
  45 infections_table = sys.argv[1]
  46 output_type = sys.argv[2]
  47
  48 # Read infections table file lines.
  49 f = open(infections_table, 'r')
  50 lines = f.readlines()
  51 f.close()
  52
  53 # Basic input validation.
  54 import datetime
  55 header_elements = lines[0].split()
  56 if set(header_elements) != district_pops.keys() or \
  57        len(header_elements) != len(district_pops.keys()):
  58     raise Exception('infections table: invalid header')
  59 line_count = 0
  60 for line in lines[1:]:
  61     line_count += 1
  62     fields = line.split()
  63     if len(header_elements) != len(fields) - 1:
  64         raise Exception('infections table: too many elements on line %s',
  65                         line_count)
  66     try:
  67         datetime.date.fromisoformat(fields[0])
  68     except ValueError:
  69         raise Exception('infections table: bad ISO date on line %s',
  70                         line_count)
  71     for field in fields[1:]:
  72         try:
  73             int(field)
  74         except ValueError:
  75             raise Exception('infections table: bad value on line %s',
  76                             line_count)
  77
  78 # Parse first table file line for the names and order of districts.
  79 db = {}
  80 sorted_districts = []
  81 for header in lines[0].split():
  82     sorted_districts += [header]
  83     db[header] = {}
  84
  85 # Seed DB with daily new infections data per district, per date.
  86 sorted_dates = []
  87 for line in lines[1:]:
  88     fields = line.split()
  89     date = fields[0]
  90     sorted_dates += [date]
  91     for i in range(len(sorted_districts)):
  92         district = sorted_districts[i]
  93         district_data = fields[i + 1]
  94         db[district][date] = {'new_infections': int(district_data)}
  95 sorted_dates.sort()
  96
  97 # In LaGeSo's data, the last "district" is actually the sum of all districts /
  98 # the whole of Berlin.
  99 #
 100 # Fail on any day where the "sum" district's new infections are not the proper
 101 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
 102 # data that is troubled in this way.  It will then have to be fixed manually in
 103 # the table file, since we should have a human look at what mistake was
 104 # probably made.
 105 for date in sorted_dates:
 106     sum_district = sorted_districts[-1]
 107     day_sum = 0
 108     for district in sorted_districts[:-1]:
 109         day_sum += db[district][date]['new_infections']
 110     if day_sum != db[sum_district][date]['new_infections']:
 111         raise Exception('Questionable district infection sum in %s' % date)
 112
 113 # Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
 114 # days that have less than 6 predecessors (we can only know a weekly average if
 115 # we have a whole week of data).
 116 for i in range(len(sorted_dates)):
 117     if i < 6:
 118         continue
 119     date = sorted_dates[i]
 120     week_dates = []
 121     for j in range(7):
 122         week_dates += [sorted_dates[i - j]]
 123     for district in sorted_districts:
 124         district_pop = district_pops[district]
 125         week_sum = 0
 126         for week_date in week_dates:
 127             week_sum += db[district][week_date]['new_infections']
 128         db[district][date]['week_sum'] = week_sum
 129         db[district][date]['week_average'] = week_sum / 7
 130         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
 131
 132 # Optimized for web browser viewing.
 133 if output_type == 'html':
 134     print("""<!DOCTYPE html>
 135 <html>
 136 <head>
 137 <style>
 138 table, tr, th, td { border: 1px solid black; }
 139 .day_row:nth-child(7n+2) { background-color: yellow; }
 140 .district_name { writing-mode: vertical-rl; transform: rotate(180deg); }
 141 </style>
 142 <title>Table of Berlin's Corona infection number development by districts</title>
 143 </head>
 144 <h1>Table of Berlin's Corona infection number development by districts</h1>
 145 <p>Updated daily at 9pm. <a href="https://plomlompom.com/repos/?p=berlin-corona-table">Source code</a>. <a href="berlin_corona.txt">Text view optimized for terminal curl</a>.</p>
 146 <table>
 147 <tr>
 148 <th>date</th>""")
 149     sorted_dates.reverse()
 150     sum_district = sorted_districts[-1]
 151     for district in sorted_districts:
 152         long_form = translate[district]
 153         if sum_district == district:
 154             print('<th>%s</th>' % long_form)
 155         else:
 156             print('<th class="district_name">%s</th>' % long_form)
 157     print('</tr>')
 158     for date in sorted_dates:
 159         print('<tr class="day_row">')
 160         print('<td>%s</td>' % date)
 161         long_wsum = translate['wsum']
 162         long_wavg = translate['wavg']
 163         long_winc = translate['winc']
 164         for district in sorted_districts:
 165             district_data = db[district][date]
 166             week_sum = week_avg = week_inc = '(not enough data)'
 167             new_infections = district_data['new_infections']
 168             if 'week_sum' in district_data:
 169                 week_sum = '%s' % district_data['week_sum']
 170             if 'week_average' in district_data:
 171                 week_avg = '%.1f' % district_data['week_average']
 172             if 'week_incidence' in district_data:
 173                 week_inc = '%.1f' % district_data['week_incidence']
 174             print('<td>')
 175             print(new_infections)
 176             if district != sum_district:
 177                 print('<details><summary></summary>')
 178             print('<table>')
 179             print('<tr><th>%s</th><td>%s</td></tr>' % (long_wsum, week_sum))
 180             print('<tr><th>%s</th><td>%s</td></tr>' % (long_wavg, week_avg))
 181             print('<tr><th>%s</th><td>%s</td></tr>' % (long_winc, week_inc))
 182             print('</table>')
 183             if district != sum_district:
 184                 print('</details>')
 185             print('</td>')
 186         print('</tr>')
 187     print('</table>')
 188     print('</html>')
 189
 190 # Optimized for in-terminal curl.
 191 elif output_type == 'txt':
 192
 193     # Explain what this is.
 194     intro = \
 195 """Table of Berlin's Corona infection number development by districts.
 196 Updated daily at 9pm.
 197
 198 Abbrevations/explanations:
 199 """
 200     for k in translate:
 201         intro += "%s: %s\n" % (k, translate[k])
 202     intro += """
 203 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
 204
 205 HTML view: https://plomlompom.com/berlin_corona.html
 206 """
 207     print(intro)
 208
 209     # Output table of enhanced daily infection data, newest on top,
 210     # separated into 7-day units.
 211     sorted_dates.reverse()
 212     weekday_count = 0
 213     sum_district = sorted_districts[-1]
 214     for date in sorted_dates:
 215
 216         # Week table header.
 217         if weekday_count == 0:
 218             print(' '*11, '  '.join(sorted_districts[:-1]),
 219                   sorted_districts[-1], 'wsum', ' wavg', 'winc')
 220             week_start_date = date
 221
 222         # Day data line.
 223         new_infections = []
 224         for district in sorted_districts:
 225             new_infections += [db[district][date]['new_infections']]
 226         week_sum = week_avg = week_inc = ''
 227         sum_district_data = db[sum_district][date]
 228         if 'week_sum' in sum_district_data:
 229             week_sum = '%4s' % sum_district_data['week_sum']
 230         if 'week_average' in sum_district_data:
 231             week_avg = '%5.1f' % sum_district_data['week_average']
 232         if 'week_incidence' in sum_district_data:
 233             week_inc = '%4.1f' % sum_district_data['week_incidence']
 234         print(date, ' '.join(['%3s' % infections
 235                               for infections in new_infections]),
 236               week_sum, week_avg, week_inc)
 237
 238         # Maintain 7-day cycle.
 239         weekday_count += 1
 240         if weekday_count != 7:
 241             continue
 242         weekday_count = 0
 243
 244         # After each 7 days, print summary for individual districts.
 245         weekly_sums = []
 246         weekly_avgs = []
 247         weekly_incs = []
 248         for district in sorted_districts[:-1]:
 249             weekly_sums += [db[district][week_start_date]['week_sum']]
 250             weekly_avgs += [db[district][week_start_date]['week_average']]
 251             weekly_incs += [db[district][week_start_date]['week_incidence']]
 252         print()
 253         print('district stats for week from %s to %s:' % (date, week_start_date))
 254         print(' '*7, '    '.join(sorted_districts[:-1]))
 255         print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
 256         print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
 257         print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
 258         print()