From: Christian Heller Date: Tue, 30 Jun 2020 21:16:57 +0000 (+0200) Subject: Add script to output enhanced table data. X-Git-Url: https://plomlompom.com/repos/?p=berlin-corona-table;a=commitdiff_plain;h=c47e8179f8330b4b04c7c97e4c2a74272825b8fb Add script to output enhanced table data. --- diff --git a/enhance_table.py b/enhance_table.py new file mode 100755 index 0000000..3860ea6 --- /dev/null +++ b/enhance_table.py @@ -0,0 +1,123 @@ +#!//usr/bin/env python3 + +# District population numbers as per Wikipedia. +district_pops = { + 'CW': 342332, + 'FK': 289762, + 'Li': 291452, + 'MH': 268548, + 'Mi': 384172, + 'Ne': 329691, + 'Pa': 407765, + 'Re': 265225, + 'Sp': 243977, + 'SZ': 308697, + 'TS': 351644, + 'TK': 271153, + 'sum': 3754418, +} + +f = open('daily_infections_table.txt', 'r') +lines = f.readlines() +f.close() + +# Parse first table file line for the names and order of districts. +db = {} +sorted_districts = [] +for header in lines[0].split(): + sorted_districts += [header] + db[header] = {} + +# Seed DB with daily new infections data per district, per date. +sorted_dates = [] +for line in lines[1:]: + fields = line.split() + date = fields[0] + sorted_dates += [date] + for i in range(len(sorted_districts)): + district = sorted_districts[i] + district_data = fields[i + 1] + db[district][date] = {'new_infections': int(district_data)} +sorted_dates.sort() + +# Fail on any day where the "sum" district's new infections are not the proper +# sum of the individual districts new infections. Yes, sometimes Lageso sends +# data that is troubled in this way. It will then have to be fixed manually in +# the table file, since we should have a human look at what mistake was +# probably made. +for date in sorted_dates: + sum_district = sorted_districts[-1] + day_sum = 0 + for district in sorted_districts[:-1]: + day_sum += db[district][date]['new_infections'] + if day_sum != db[sum_district][date]['new_infections']: + raise Exception('Questionable district infection sum in %s' % date) + +# Enhance DB with data about weekly sums, averages, incidences per day. Ignore +# days that have less than 6 predecessors (we can only know a weekly average if +# we have a whole week of data). +for i in range(len(sorted_dates)): + if i < 6: + continue + date = sorted_dates[i] + week_dates = [] + for j in range(7): + week_dates += [sorted_dates[i - j]] + for district in sorted_districts: + district_pop = district_pops[district] + week_sum = 0 + for week_date in week_dates: + week_sum += db[district][week_date]['new_infections'] + db[district][date]['week_sum'] = week_sum + db[district][date]['week_average'] = week_sum / 7 + db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000 + +# Output table of enhanced daily infection data, newest on top, separated into +# 7-day units. +sorted_dates.reverse() +weekday_count = 0 +for date in sorted_dates: + + # Week table header. + if weekday_count == 0: + print(' '*11, ' '.join(sorted_districts[:-1]), + sorted_districts[-1], 'wsum', ' wavg', 'winc') + week_start_date = date + + # Day data line. + new_infections = [] + for district in sorted_districts: + new_infections += [db[district][date]['new_infections']] + week_sum = week_avg = week_inc = '' + sum_district = sorted_districts[-1] + sum_district_data = db[sum_district][date] + if 'week_sum' in sum_district_data: + week_sum = '%4s' % sum_district_data['week_sum'] + if 'week_average' in sum_district_data: + week_avg = '%5.1f' % sum_district_data['week_average'] + if 'week_incidence' in sum_district_data: + week_inc = '%4.1f' % sum_district_data['week_incidence'] + print(date, ' '.join(['%3s' % infections for infections in new_infections]), + week_sum, week_avg, week_inc) + + # Maintain 7-day cycle. + weekday_count += 1 + if weekday_count != 7: + continue + weekday_count = 0 + + # After each 7 days, print summary for individual districts. + weekly_sums = [] + weekly_avgs = [] + weekly_incs = [] + for district in sorted_districts[:-1]: + weekly_sums += [db[district][week_start_date]['week_sum']] + weekly_avgs += [db[district][week_start_date]['week_average']] + weekly_incs += [db[district][week_start_date]['week_incidence']] + print() + print('district stats for week from %s to %s:' % (date, week_start_date)) + print(' '*7, ' '.join(sorted_districts[:-1])) + print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums])) + print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs])) + print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs])) + print()