X-Git-Url: https://plomlompom.com/repos/processes?a=blobdiff_plain;f=enhance_table.py;h=774b333a77cf96951609c4363f219f96d158244a;hb=ea8a53f23b20bebeeabb6716a5587731f60b28d5;hp=3860ea61b4cc74409718a59b268689043c787d88;hpb=c47e8179f8330b4b04c7c97e4c2a74272825b8fb;p=berlin-corona-table diff --git a/enhance_table.py b/enhance_table.py index 3860ea6..774b333 100755 --- a/enhance_table.py +++ b/enhance_table.py @@ -17,11 +17,42 @@ district_pops = { 'sum': 3754418, } -f = open('daily_infections_table.txt', 'r') +# Read infections table file lines. +import sys +if len(sys.argv) != 2: + print('Expecting infections table file path as only argument.') + exit(1) +infections_table = sys.argv[1] +f = open(infections_table, 'r') lines = f.readlines() f.close() -# Parse first table file line for the names and order of districts. +# Basic input validation. +import datetime +header_elements = lines[0].split() +if set(header_elements) != district_pops.keys() or \ + len(header_elements) != len(district_pops.keys()): + raise Exception('infections table: invalid header') +line_count = 0 +for line in lines[1:]: + line_count += 1 + fields = line.split() + if len(header_elements) != len(fields) - 1: + raise Exception('infections table: too many elements on line %s', + line_count) + try: + datetime.date.fromisoformat(fields[0]) + except ValueError: + raise Exception('infections table: bad ISO date on line %s', + line_count) + for field in fields[1:]: + try: + int(field) + except ValueError: + raise Exception('infections table: bad value on line %s', + line_count) + +# Parse first table file line for the names and order of districts. db = {} sorted_districts = [] for header in lines[0].split(): @@ -40,11 +71,14 @@ for line in lines[1:]: db[district][date] = {'new_infections': int(district_data)} sorted_dates.sort() +# In LaGeSo's data, the last "district" is actually the sum of all districts / +# the whole of Berlin. +# # Fail on any day where the "sum" district's new infections are not the proper # sum of the individual districts new infections. Yes, sometimes Lageso sends # data that is troubled in this way. It will then have to be fixed manually in # the table file, since we should have a human look at what mistake was -# probably made. +# probably made. for date in sorted_dates: sum_district = sorted_districts[-1] day_sum = 0 @@ -60,9 +94,9 @@ for i in range(len(sorted_dates)): if i < 6: continue date = sorted_dates[i] - week_dates = [] + week_dates = [] for j in range(7): - week_dates += [sorted_dates[i - j]] + week_dates += [sorted_dates[i - j]] for district in sorted_districts: district_pop = district_pops[district] week_sum = 0 @@ -72,6 +106,33 @@ for i in range(len(sorted_dates)): db[district][date]['week_average'] = week_sum / 7 db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000 +# Explain what this is. +intro = """Table of Berlin's Corona infection number development by districts. +Updated daily around 9pm. + +Abbrevations/explanations: + +CW: Charlottenburg-Wilmersdorf +FK: Friedrichshain-Kreuzberg +Li: Lichtenberg +MH: Marzahn-Hellersdorf +Mi: Mitte +Ne: Neukölln +Pa: Pankow +Re: Reinickendorf +Sp: Spandau +SZ: Steglitz-Zehlendorf +TS: Tempelhof-Schöneberg +TK: Treptow-Köpenick +sum: sum for all the districts +wsum: sum for last 7 days +wavg: per-day average of new infections for last 7 days +winc: incidence (x per 100k inhabitants) of new infections for last 7 days + +Source code: https://plomlompom.com/repos/?p=berlin-corona-table +""" +print(intro) + # Output table of enhanced daily infection data, newest on top, separated into # 7-day units. sorted_dates.reverse() @@ -82,9 +143,9 @@ for date in sorted_dates: if weekday_count == 0: print(' '*11, ' '.join(sorted_districts[:-1]), sorted_districts[-1], 'wsum', ' wavg', 'winc') - week_start_date = date + week_start_date = date - # Day data line. + # Day data line. new_infections = [] for district in sorted_districts: new_infections += [db[district][date]['new_infections']] @@ -92,13 +153,13 @@ for date in sorted_dates: sum_district = sorted_districts[-1] sum_district_data = db[sum_district][date] if 'week_sum' in sum_district_data: - week_sum = '%4s' % sum_district_data['week_sum'] + week_sum = '%4s' % sum_district_data['week_sum'] if 'week_average' in sum_district_data: - week_avg = '%5.1f' % sum_district_data['week_average'] + week_avg = '%5.1f' % sum_district_data['week_average'] if 'week_incidence' in sum_district_data: - week_inc = '%4.1f' % sum_district_data['week_incidence'] + week_inc = '%4.1f' % sum_district_data['week_incidence'] print(date, ' '.join(['%3s' % infections for infections in new_infections]), - week_sum, week_avg, week_inc) + week_sum, week_avg, week_inc) # Maintain 7-day cycle. weekday_count += 1