#!//usr/bin/env python3
# District population numbers as per Wikipedia.
district_pops = {
'CW': 342332,
'FK': 289762,
'Li': 291452,
'MH': 268548,
'Mi': 384172,
'Ne': 329691,
'Pa': 407765,
'Re': 265225,
'Sp': 243977,
'SZ': 308697,
'TS': 351644,
'TK': 271153,
'sum': 3754418,
}
# Read infections table path and output type.
import sys
if len(sys.argv) != 3:
print('Expecting infections table file path and output type as only arguments.')
exit(1)
infections_table = sys.argv[1]
output_type = sys.argv[2]
# Read infections table file lines.
f = open(infections_table, 'r')
lines = f.readlines()
f.close()
# Basic input validation.
import datetime
header_elements = lines[0].split()
if set(header_elements) != district_pops.keys() or \
len(header_elements) != len(district_pops.keys()):
raise Exception('infections table: invalid header')
line_count = 0
for line in lines[1:]:
line_count += 1
fields = line.split()
if len(header_elements) != len(fields) - 1:
raise Exception('infections table: too many elements on line %s',
line_count)
try:
datetime.date.fromisoformat(fields[0])
except ValueError:
raise Exception('infections table: bad ISO date on line %s',
line_count)
for field in fields[1:]:
try:
int(field)
except ValueError:
raise Exception('infections table: bad value on line %s',
line_count)
# Parse first table file line for the names and order of districts.
db = {}
sorted_districts = []
for header in lines[0].split():
sorted_districts += [header]
db[header] = {}
# Seed DB with daily new infections data per district, per date.
sorted_dates = []
for line in lines[1:]:
fields = line.split()
date = fields[0]
sorted_dates += [date]
for i in range(len(sorted_districts)):
district = sorted_districts[i]
district_data = fields[i + 1]
db[district][date] = {'new_infections': int(district_data)}
sorted_dates.sort()
# In LaGeSo's data, the last "district" is actually the sum of all districts /
# the whole of Berlin.
#
# Fail on any day where the "sum" district's new infections are not the proper
# sum of the individual districts new infections. Yes, sometimes Lageso sends
# data that is troubled in this way. It will then have to be fixed manually in
# the table file, since we should have a human look at what mistake was
# probably made.
for date in sorted_dates:
sum_district = sorted_districts[-1]
day_sum = 0
for district in sorted_districts[:-1]:
day_sum += db[district][date]['new_infections']
if day_sum != db[sum_district][date]['new_infections']:
raise Exception('Questionable district infection sum in %s' % date)
# Enhance DB with data about weekly sums, averages, incidences per day. Ignore
# days that have less than 6 predecessors (we can only know a weekly average if
# we have a whole week of data).
for i in range(len(sorted_dates)):
if i < 6:
continue
date = sorted_dates[i]
week_dates = []
for j in range(7):
week_dates += [sorted_dates[i - j]]
for district in sorted_districts:
district_pop = district_pops[district]
week_sum = 0
for week_date in week_dates:
week_sum += db[district][week_date]['new_infections']
db[district][date]['week_sum'] = week_sum
db[district][date]['week_average'] = week_sum / 7
db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
# Optimized for web browser viewing.
if output_type == 'html':
print('')
print('')
print('
')
print('')
print('date | ')
for district in sorted_districts:
print('%s | ' % district)
print('
')
sorted_dates.reverse()
for date in sorted_dates:
print('')
print('%s | ' % date)
for district in sorted_districts:
district_data = db[district][date]
week_sum = week_avg = week_inc = ''
new_infections = district_data['new_infections']
if 'week_sum' in district_data:
week_sum = '%s' % district_data['week_sum']
if 'week_average' in district_data:
week_avg = '%.1f' % district_data['week_average']
if 'week_incidence' in district_data:
week_inc = '%.1f' % district_data['week_incidence']
print('')
print('')
print('new | %s | ' % new_infections)
print('wsum | %s | ' % week_sum)
print('wavg | %s | ' % week_avg)
print('winc | %s | ' % week_inc)
print(' ')
print(' | ')
print('
')
print('
')
print('')
# Optimized for in-terminal curl.
elif output_type == 'txt':
# Explain what this is.
intro = \
"""Table of Berlin's Corona infection number development by districts.
Updated daily around 9pm.
Abbrevations/explanations:
CW: Charlottenburg-Wilmersdorf
FK: Friedrichshain-Kreuzberg
Li: Lichtenberg
MH: Marzahn-Hellersdorf
Mi: Mitte
Ne: Neukölln
Pa: Pankow
Re: Reinickendorf
Sp: Spandau
SZ: Steglitz-Zehlendorf
TS: Tempelhof-Schöneberg
TK: Treptow-Köpenick
sum: sum for all the districts
wsum: sum for last 7 days
wavg: per-day average of new infections for last 7 days
winc: incidence (x per 100k inhabitants) of new infections for last 7 days
Source code: https://plomlompom.com/repos/?p=berlin-corona-table
"""
print(intro)
# Output table of enhanced daily infection data, newest on top,
# separated into 7-day units.
sorted_dates.reverse()
weekday_count = 0
sum_district = sorted_districts[-1]
for date in sorted_dates:
# Week table header.
if weekday_count == 0:
print(' '*11, ' '.join(sorted_districts[:-1]),
sorted_districts[-1], 'wsum', ' wavg', 'winc')
week_start_date = date
# Day data line.
new_infections = []
for district in sorted_districts:
new_infections += [db[district][date]['new_infections']]
week_sum = week_avg = week_inc = ''
sum_district_data = db[sum_district][date]
if 'week_sum' in sum_district_data:
week_sum = '%4s' % sum_district_data['week_sum']
if 'week_average' in sum_district_data:
week_avg = '%5.1f' % sum_district_data['week_average']
if 'week_incidence' in sum_district_data:
week_inc = '%4.1f' % sum_district_data['week_incidence']
print(date, ' '.join(['%3s' % infections
for infections in new_infections]),
week_sum, week_avg, week_inc)
# Maintain 7-day cycle.
weekday_count += 1
if weekday_count != 7:
continue
weekday_count = 0
# After each 7 days, print summary for individual districts.
weekly_sums = []
weekly_avgs = []
weekly_incs = []
for district in sorted_districts[:-1]:
weekly_sums += [db[district][week_start_date]['week_sum']]
weekly_avgs += [db[district][week_start_date]['week_average']]
weekly_incs += [db[district][week_start_date]['week_incidence']]
print()
print('district stats for week from %s to %s:' % (date, week_start_date))
print(' '*7, ' '.join(sorted_districts[:-1]))
print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
print()