X-Git-Url: https://plomlompom.com/repos/?p=berlin-corona-table;a=blobdiff_plain;f=enhance_table.py;h=66614c36433a9e64422fe1768ee11ef217624fde;hp=d9ca849c4901232bc12e501b462714c749a9bf12;hb=e8850998bd3f831bf1291b0d5ea02d53cc7f2802;hpb=2724a3abcb796cf91bbcac204569594765ae6eb7 diff --git a/enhance_table.py b/enhance_table.py index d9ca849..66614c3 100755 --- a/enhance_table.py +++ b/enhance_table.py @@ -17,11 +17,65 @@ district_pops = { 'sum': 3754418, } -f = open('daily_infections_table.txt', 'r') +# Map abbreviations to full names. +translate = { + 'CW': 'Charlottenburg-Wilmersdorf', + 'FK': 'Friedrichshain-Kreuzberg', + 'Li': 'Lichtenberg', + 'MH': 'Marzahn-Hellersdorf', + 'Mi': 'Mitte', + 'Ne': 'Neukölln', + 'Pa': 'Pankow', + 'Re': 'Reinickendorf', + 'Sp': 'Spandau', + 'SZ': 'Steglitz-Zehlendorf', + 'TS': 'Tempelhof-Schöneberg', + 'TK': 'Treptow-Köpenick', + 'sum': 'all of Berlin', + 'wsum': 'sum for last 7 days', + 'wavg': 'per-day average of new infections for last 7 days', + 'winc': 'incidence (x per 100k inhabitants) of new infections for last 7 days', +} + +# Read infections table path and output type. +import sys +if len(sys.argv) != 3: + print('Expecting infections table file path and output type as only arguments.') + exit(1) +infections_table = sys.argv[1] +output_type = sys.argv[2] + +# Read infections table file lines. +f = open(infections_table, 'r') lines = f.readlines() f.close() -# Parse first table file line for the names and order of districts. +# Basic input validation. +import datetime +header_elements = lines[0].split() +if set(header_elements) != district_pops.keys() or \ + len(header_elements) != len(district_pops.keys()): + raise Exception('infections table: invalid header') +line_count = 0 +for line in lines[1:]: + line_count += 1 + fields = line.split() + if len(header_elements) != len(fields) - 1: + raise Exception('infections table: too many elements on line %s', + line_count) + try: + datetime.date.fromisoformat(fields[0]) + except ValueError: + raise Exception('infections table: bad ISO date on line %s', + line_count) + for field in fields[1:]: + try: + int(field) + except ValueError: + raise Exception('infections table: bad value on line %s', + line_count) + +# Parse first table file line for the names and order of districts. db = {} sorted_districts = [] for header in lines[0].split(): @@ -40,11 +94,14 @@ for line in lines[1:]: db[district][date] = {'new_infections': int(district_data)} sorted_dates.sort() +# In LaGeSo's data, the last "district" is actually the sum of all districts / +# the whole of Berlin. +# # Fail on any day where the "sum" district's new infections are not the proper # sum of the individual districts new infections. Yes, sometimes Lageso sends # data that is troubled in this way. It will then have to be fixed manually in # the table file, since we should have a human look at what mistake was -# probably made. +# probably made. for date in sorted_dates: sum_district = sorted_districts[-1] day_sum = 0 @@ -60,9 +117,9 @@ for i in range(len(sorted_dates)): if i < 6: continue date = sorted_dates[i] - week_dates = [] + week_dates = [] for j in range(7): - week_dates += [sorted_dates[i - j]] + week_dates += [sorted_dates[i - j]] for district in sorted_districts: district_pop = district_pops[district] week_sum = 0 @@ -72,79 +129,122 @@ for i in range(len(sorted_dates)): db[district][date]['week_average'] = week_sum / 7 db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000 -# Explain what this is. -intro = """Table of Berlin's Corona infection number development by districts. -Updated daily around 9pm. +# Optimized for web browser viewing. +if output_type == 'html': + print('') + print('') + print('') + print('') + print('') + sorted_dates.reverse() + sum_district = sorted_districts[-1] + for district in sorted_districts: + long_form = translate[district] + if sum_district == district: + print('' % long_form) + else: + print('' % long_form) + print('') + for date in sorted_dates: + print('') + print('' % date) + long_wsum = translate['wsum'] + long_wavg = translate['wavg'] + long_winc = translate['winc'] + for district in sorted_districts: + district_data = db[district][date] + week_sum = week_avg = week_inc = '' + new_infections = district_data['new_infections'] + if 'week_sum' in district_data: + week_sum = '%s' % district_data['week_sum'] + if 'week_average' in district_data: + week_avg = '%.1f' % district_data['week_average'] + if 'week_incidence' in district_data: + week_inc = '%.1f' % district_data['week_incidence'] + print('') + print('') + print('
date%s%s
%s') + print(new_infections) + if district == sum_district: + print('
') + else: + print('
') + print('' % (long_wsum, week_sum)) + print('' % (long_wavg, week_avg)) + print('' % (long_winc, week_inc)) + print('
%s%s
%s%s
%s%s
') + print('
') + print('') -Abbrevations/explanations: +# Optimized for in-terminal curl. +elif output_type == 'txt': -CW: Charlottenburg-Wilmersdorf -FK: Friedrichshain-Kreuzberg -Li: Lichtenberg -MH: Marzahn-Hellersdorf -Mi: Mitte -Ne: Neukölln -Pa: Pankow -Re: Reinickendorf -Sp: Spandau -SZ: Steglitz-Zehlendorf -TS: Tempelhof-Schöneberg -TK: Treptow-Köpenick -sum: sum for all the districts -wsum: sum for last 7 days -wavg: per-day average of new infections for last 7 days -winc: incidence (x per 100k inhabitants) of new infections for last 7 days + # Explain what this is. + intro = \ +"""Table of Berlin's Corona infection number development by districts. +Updated daily around 9pm. +Abbrevations/explanations: +""" + for k in translate: + intro += "%s: %s\n" % (k, translate[k]) + intro += """ Source code: https://plomlompom.com/repos/?p=berlin-corona-table """ -print(intro) + print(intro) -# Output table of enhanced daily infection data, newest on top, separated into -# 7-day units. -sorted_dates.reverse() -weekday_count = 0 -for date in sorted_dates: + # Output table of enhanced daily infection data, newest on top, + # separated into 7-day units. + sorted_dates.reverse() + weekday_count = 0 + sum_district = sorted_districts[-1] + for date in sorted_dates: - # Week table header. - if weekday_count == 0: - print(' '*11, ' '.join(sorted_districts[:-1]), - sorted_districts[-1], 'wsum', ' wavg', 'winc') - week_start_date = date + # Week table header. + if weekday_count == 0: + print(' '*11, ' '.join(sorted_districts[:-1]), + sorted_districts[-1], 'wsum', ' wavg', 'winc') + week_start_date = date - # Day data line. - new_infections = [] - for district in sorted_districts: - new_infections += [db[district][date]['new_infections']] - week_sum = week_avg = week_inc = '' - sum_district = sorted_districts[-1] - sum_district_data = db[sum_district][date] - if 'week_sum' in sum_district_data: - week_sum = '%4s' % sum_district_data['week_sum'] - if 'week_average' in sum_district_data: - week_avg = '%5.1f' % sum_district_data['week_average'] - if 'week_incidence' in sum_district_data: - week_inc = '%4.1f' % sum_district_data['week_incidence'] - print(date, ' '.join(['%3s' % infections for infections in new_infections]), - week_sum, week_avg, week_inc) - - # Maintain 7-day cycle. - weekday_count += 1 - if weekday_count != 7: - continue - weekday_count = 0 + # Day data line. + new_infections = [] + for district in sorted_districts: + new_infections += [db[district][date]['new_infections']] + week_sum = week_avg = week_inc = '' + sum_district_data = db[sum_district][date] + if 'week_sum' in sum_district_data: + week_sum = '%4s' % sum_district_data['week_sum'] + if 'week_average' in sum_district_data: + week_avg = '%5.1f' % sum_district_data['week_average'] + if 'week_incidence' in sum_district_data: + week_inc = '%4.1f' % sum_district_data['week_incidence'] + print(date, ' '.join(['%3s' % infections + for infections in new_infections]), + week_sum, week_avg, week_inc) - # After each 7 days, print summary for individual districts. - weekly_sums = [] - weekly_avgs = [] - weekly_incs = [] - for district in sorted_districts[:-1]: - weekly_sums += [db[district][week_start_date]['week_sum']] - weekly_avgs += [db[district][week_start_date]['week_average']] - weekly_incs += [db[district][week_start_date]['week_incidence']] - print() - print('district stats for week from %s to %s:' % (date, week_start_date)) - print(' '*7, ' '.join(sorted_districts[:-1])) - print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums])) - print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs])) - print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs])) - print() + # Maintain 7-day cycle. + weekday_count += 1 + if weekday_count != 7: + continue + weekday_count = 0 + + # After each 7 days, print summary for individual districts. + weekly_sums = [] + weekly_avgs = [] + weekly_incs = [] + for district in sorted_districts[:-1]: + weekly_sums += [db[district][week_start_date]['week_sum']] + weekly_avgs += [db[district][week_start_date]['week_average']] + weekly_incs += [db[district][week_start_date]['week_incidence']] + print() + print('district stats for week from %s to %s:' % (date, week_start_date)) + print(' '*7, ' '.join(sorted_districts[:-1])) + print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums])) + print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs])) + print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs])) + print()