#!//usr/bin/env python3 # District population numbers as per Wikipedia. district_pops = { 'CW': 342332, 'FK': 289762, 'Li': 291452, 'MH': 268548, 'Mi': 384172, 'Ne': 329691, 'Pa': 407765, 'Re': 265225, 'Sp': 243977, 'SZ': 308697, 'TS': 351644, 'TK': 271153, 'sum': 3754418, } # Map abbreviations to full names. translate = { 'CW': 'Charlottenburg-Wilmersdorf', 'FK': 'Friedrichshain-Kreuzberg', 'Li': 'Lichtenberg', 'MH': 'Marzahn-Hellersdorf', 'Mi': 'Mitte', 'Ne': 'Neukölln', 'Pa': 'Pankow', 'Re': 'Reinickendorf', 'Sp': 'Spandau', 'SZ': 'Steglitz-Zehlendorf', 'TS': 'Tempelhof-Schöneberg', 'TK': 'Treptow-Köpenick', 'sum': 'all of Berlin', '+': 'new infections counted that day', 'Σ': 'sum of new infections for last 7 days', 'Ø': 'per-day average of new infections for last 7 days', 'i': 'incidence (x per 100k inhabitants) of new infections for last 7 days', } # Read infections table path and output type. import sys if len(sys.argv) != 3: print('Expecting infections table file path and output type as only arguments.') exit(1) infections_table = sys.argv[1] output_type = sys.argv[2] # Read infections table file lines. f = open(infections_table, 'r') lines = f.readlines() f.close() # Basic input validation. import datetime header_elements = lines[0].split() if set(header_elements) != district_pops.keys() or \ len(header_elements) != len(district_pops.keys()): raise Exception('infections table: invalid header') line_count = 0 for line in lines[1:]: line_count += 1 fields = line.split() if len(header_elements) != len(fields) - 1: raise Exception('infections table: too many elements on line %s', line_count) try: datetime.date.fromisoformat(fields[0]) except ValueError: raise Exception('infections table: bad ISO date on line %s', line_count) for field in fields[1:]: try: int(field) except ValueError: raise Exception('infections table: bad value on line %s', line_count) # Parse first table file line for the names and order of districts. db = {} sorted_districts = [] for header in lines[0].split(): sorted_districts += [header] db[header] = {} # Seed DB with daily new infections data per district, per date. sorted_dates = [] for line in lines[1:]: fields = line.split() date = fields[0] sorted_dates += [date] for i in range(len(sorted_districts)): district = sorted_districts[i] district_data = fields[i + 1] db[district][date] = {'new_infections': int(district_data)} sorted_dates.sort() # In LaGeSo's data, the last "district" is actually the sum of all districts / # the whole of Berlin. For our district order, move it in front of the other # districts, as its numbers are the most interesting, so in the table views # we want to see it first. sum_district = sorted_districts.pop() sorted_districts.insert(0, sum_district) # Fail on any day where the "sum" district's new infections are not the proper # sum of the individual districts new infections. Yes, sometimes Lageso sends # data that is troubled in this way. It will then have to be fixed manually in # the table file, since we should have a human look at what mistake was # probably made. for date in sorted_dates: day_sum = 0 for district in [d for d in sorted_districts if not d==sum_district]: day_sum += db[district][date]['new_infections'] if day_sum != db[sum_district][date]['new_infections']: raise Exception('Questionable district infection sum in %s' % date) # Enhance DB with data about weekly sums, averages, incidences per day. Ignore # days that have less than 6 predecessors (we can only know a weekly average if # we have a whole week of data). for i in range(len(sorted_dates)): if i < 6: continue date = sorted_dates[i] week_dates = [] for j in range(7): week_dates += [sorted_dates[i - j]] for district in sorted_districts: district_pop = district_pops[district] week_sum = 0 for week_date in week_dates: week_sum += db[district][week_date]['new_infections'] db[district][date]['week_sum'] = week_sum db[district][date]['week_average'] = week_sum / 7 db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000 # Optimized for web browser viewing. import calendar if output_type == 'html': print("""
Updated daily at 9pm based on data from the "Senatsverwaltung für Gesundheit, Pflege und Gleichstellung". Source code. Plain text view (optimized for terminal curl).
""") sorted_dates.reverse() for district in sorted_districts: # Wrap in div because the vertical orientation otherwise fails # in Chromium. print(' | %s | ' %
translate[district])
print('||||||
---|---|---|---|---|---|---|---|
date | ') print('? | ') for district in sorted_districts: print('%s | ' % (translate[district], district)) print('|||||
%s (%s) | ' % (date, weekday))
print('
| ')
for district in sorted_districts:
district_data = db[district][date]
week_sum = week_avg = week_inc = '?'
new_infections = district_data['new_infections']
if 'week_sum' in district_data:
week_sum = '%s' % district_data['week_sum']
if 'week_average' in district_data:
week_avg = '%.1f' % district_data['week_average']
if 'week_incidence' in district_data:
week_inc = '%.1f' % district_data['week_incidence']
print('')
print('
| ')
print('