#!//usr/bin/env python3 # District population numbers as per Wikipedia. district_pops = { 'CW': 342332, 'FK': 289762, 'Li': 291452, 'MH': 268548, 'Mi': 384172, 'Ne': 329691, 'Pa': 407765, 'Re': 265225, 'Sp': 243977, 'SZ': 308697, 'TS': 351644, 'TK': 271153, 'sum': 3754418, } # Map abbreviations to full names. translate = { 'CW': 'Charlottenburg-Wilmersdorf', 'FK': 'Friedrichshain-Kreuzberg', 'Li': 'Lichtenberg', 'MH': 'Marzahn-Hellersdorf', 'Mi': 'Mitte', 'Ne': 'Neukölln', 'Pa': 'Pankow', 'Re': 'Reinickendorf', 'Sp': 'Spandau', 'SZ': 'Steglitz-Zehlendorf', 'TS': 'Tempelhof-Schöneberg', 'TK': 'Treptow-Köpenick', 'sum': 'all of Berlin', 'wsum': 'sum for last 7 days', 'wavg': 'per-day average of new infections for last 7 days', 'winc': 'incidence (x per 100k inhabitants) of new infections for last 7 days', } # Read infections table path and output type. import sys if len(sys.argv) != 3: print('Expecting infections table file path and output type as only arguments.') exit(1) infections_table = sys.argv[1] output_type = sys.argv[2] # Read infections table file lines. f = open(infections_table, 'r') lines = f.readlines() f.close() # Basic input validation. import datetime header_elements = lines[0].split() if set(header_elements) != district_pops.keys() or \ len(header_elements) != len(district_pops.keys()): raise Exception('infections table: invalid header') line_count = 0 for line in lines[1:]: line_count += 1 fields = line.split() if len(header_elements) != len(fields) - 1: raise Exception('infections table: too many elements on line %s', line_count) try: datetime.date.fromisoformat(fields[0]) except ValueError: raise Exception('infections table: bad ISO date on line %s', line_count) for field in fields[1:]: try: int(field) except ValueError: raise Exception('infections table: bad value on line %s', line_count) # Parse first table file line for the names and order of districts. db = {} sorted_districts = [] for header in lines[0].split(): sorted_districts += [header] db[header] = {} # Seed DB with daily new infections data per district, per date. sorted_dates = [] for line in lines[1:]: fields = line.split() date = fields[0] sorted_dates += [date] for i in range(len(sorted_districts)): district = sorted_districts[i] district_data = fields[i + 1] db[district][date] = {'new_infections': int(district_data)} sorted_dates.sort() # In LaGeSo's data, the last "district" is actually the sum of all districts / # the whole of Berlin. # # Fail on any day where the "sum" district's new infections are not the proper # sum of the individual districts new infections. Yes, sometimes Lageso sends # data that is troubled in this way. It will then have to be fixed manually in # the table file, since we should have a human look at what mistake was # probably made. for date in sorted_dates: sum_district = sorted_districts[-1] day_sum = 0 for district in sorted_districts[:-1]: day_sum += db[district][date]['new_infections'] if day_sum != db[sum_district][date]['new_infections']: raise Exception('Questionable district infection sum in %s' % date) # Enhance DB with data about weekly sums, averages, incidences per day. Ignore # days that have less than 6 predecessors (we can only know a weekly average if # we have a whole week of data). for i in range(len(sorted_dates)): if i < 6: continue date = sorted_dates[i] week_dates = [] for j in range(7): week_dates += [sorted_dates[i - j]] for district in sorted_districts: district_pop = district_pops[district] week_sum = 0 for week_date in week_dates: week_sum += db[district][week_date]['new_infections'] db[district][date]['week_sum'] = week_sum db[district][date]['week_average'] = week_sum / 7 db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000 # Optimized for web browser viewing. if output_type == 'html': print("""
Updated daily at 9pm. Source code. Text view optimized for terminal curl.
date | """) sorted_dates.reverse() sum_district = sorted_districts[-1] for district in sorted_districts: long_form = translate[district] if sum_district == district: print('%s | ' % long_form) else: print('%s | ' % long_form) print('|||||
---|---|---|---|---|---|---|---|
%s | ' % date) long_wsum = translate['wsum'] long_wavg = translate['wavg'] long_winc = translate['winc'] for district in sorted_districts: district_data = db[district][date] week_sum = week_avg = week_inc = '' new_infections = district_data['new_infections'] if 'week_sum' in district_data: week_sum = '%s' % district_data['week_sum'] if 'week_average' in district_data: week_avg = '%.1f' % district_data['week_average'] if 'week_incidence' in district_data: week_inc = '%.1f' % district_data['week_incidence'] print('')
print(new_infections)
if district != sum_district:
print('') print('
| ')
print('