X-Git-Url: https://plomlompom.com/repos/do_day?a=blobdiff_plain;ds=sidebyside;f=enhance_table.py;h=8d0fd335ec2c62e077bb3b34fcd47f10920adb30;hb=7c0bea9a26a3e3a5466f1a75d41f4a48d2106ac2;hp=a626329bcd20236477e60b8bd80b93c38274bcaa;hpb=078d5207c9a6d7ad4611df311a5e11a635f6f30f;p=berlin-corona-table
diff --git a/enhance_table.py b/enhance_table.py
index a626329..8d0fd33 100755
--- a/enhance_table.py
+++ b/enhance_table.py
@@ -17,11 +17,65 @@ district_pops = {
'sum': 3754418,
}
-f = open('daily_infections_table.txt', 'r')
+# Map abbreviations to full names.
+translate = {
+ 'CW': 'Charlottenburg-Wilmersdorf',
+ 'FK': 'Friedrichshain-Kreuzberg',
+ 'Li': 'Lichtenberg',
+ 'MH': 'Marzahn-Hellersdorf',
+ 'Mi': 'Mitte',
+ 'Ne': 'Neukölln',
+ 'Pa': 'Pankow',
+ 'Re': 'Reinickendorf',
+ 'Sp': 'Spandau',
+ 'SZ': 'Steglitz-Zehlendorf',
+ 'TS': 'Tempelhof-Schöneberg',
+ 'TK': 'Treptow-Köpenick',
+ 'sum': 'all of Berlin',
+ 'wsum': 'sum for last 7 days',
+ 'wavg': 'per-day average of new infections for last 7 days',
+ 'winc': 'incidence (x per 100k inhabitants) of new infections for last 7 days',
+}
+
+# Read infections table path and output type.
+import sys
+if len(sys.argv) != 3:
+ print('Expecting infections table file path and output type as only arguments.')
+ exit(1)
+infections_table = sys.argv[1]
+output_type = sys.argv[2]
+
+# Read infections table file lines.
+f = open(infections_table, 'r')
lines = f.readlines()
f.close()
-# Parse first table file line for the names and order of districts.
+# Basic input validation.
+import datetime
+header_elements = lines[0].split()
+if set(header_elements) != district_pops.keys() or \
+ len(header_elements) != len(district_pops.keys()):
+ raise Exception('infections table: invalid header')
+line_count = 0
+for line in lines[1:]:
+ line_count += 1
+ fields = line.split()
+ if len(header_elements) != len(fields) - 1:
+ raise Exception('infections table: too many elements on line %s',
+ line_count)
+ try:
+ datetime.date.fromisoformat(fields[0])
+ except ValueError:
+ raise Exception('infections table: bad ISO date on line %s',
+ line_count)
+ for field in fields[1:]:
+ try:
+ int(field)
+ except ValueError:
+ raise Exception('infections table: bad value on line %s',
+ line_count)
+
+# Parse first table file line for the names and order of districts.
db = {}
sorted_districts = []
for header in lines[0].split():
@@ -40,11 +94,14 @@ for line in lines[1:]:
db[district][date] = {'new_infections': int(district_data)}
sorted_dates.sort()
+# In LaGeSo's data, the last "district" is actually the sum of all districts /
+# the whole of Berlin.
+#
# Fail on any day where the "sum" district's new infections are not the proper
# sum of the individual districts new infections. Yes, sometimes Lageso sends
# data that is troubled in this way. It will then have to be fixed manually in
# the table file, since we should have a human look at what mistake was
-# probably made.
+# probably made.
for date in sorted_dates:
sum_district = sorted_districts[-1]
day_sum = 0
@@ -60,9 +117,9 @@ for i in range(len(sorted_dates)):
if i < 6:
continue
date = sorted_dates[i]
- week_dates = []
+ week_dates = []
for j in range(7):
- week_dates += [sorted_dates[i - j]]
+ week_dates += [sorted_dates[i - j]]
for district in sorted_districts:
district_pop = district_pops[district]
week_sum = 0
@@ -72,79 +129,130 @@ for i in range(len(sorted_dates)):
db[district][date]['week_average'] = week_sum / 7
db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
-# Explain what this is.
-intro = """
-Table of Berlin's Corona infection number development by districts, daily
-updated around 9pm.
+# Optimized for web browser viewing.
+if output_type == 'html':
+ print("""
+
+
+
+Table of Berlin's Corona infection number development by districts
+
+Table of Berlin's Corona infection number development by districts
+Updated daily at 9pm. Source code. Text view optimized for terminal curl.
+
+
+date | """)
+ sorted_dates.reverse()
+ sum_district = sorted_districts[-1]
+ for district in sorted_districts:
+ long_form = translate[district]
+ if sum_district == district:
+ print('%s | ' % long_form)
+ else:
+ print('%s | ' % long_form)
+ print('
')
+ for date in sorted_dates:
+ print('')
+ print('%s | ' % date)
+ long_wsum = translate['wsum']
+ long_wavg = translate['wavg']
+ long_winc = translate['winc']
+ for district in sorted_districts:
+ district_data = db[district][date]
+ week_sum = week_avg = week_inc = '(not enough data)'
+ new_infections = district_data['new_infections']
+ if 'week_sum' in district_data:
+ week_sum = '%s' % district_data['week_sum']
+ if 'week_average' in district_data:
+ week_avg = '%.1f' % district_data['week_average']
+ if 'week_incidence' in district_data:
+ week_inc = '%.1f' % district_data['week_incidence']
+ print('')
+ print(new_infections)
+ if district != sum_district:
+ print('')
+ print('')
+ print('%s | %s | ' % (long_wsum, week_sum))
+ print('%s | %s | ' % (long_wavg, week_avg))
+ print('%s | %s | ' % (long_winc, week_inc))
+ print(' ')
+ if district != sum_district:
+ print(' ')
+ print(' | ')
+ print('
')
+ print('
')
+ print('')
-Abbrevations/explanations:
-CW: Charlottenburg-Wilmersdorf
-FK: Friedrichshain-Kreuzberg
-Li: Lichtenberg
-MH: Marzahn-Hellersdorf
-Mi: Mitte
-Ne: Neukölln
-Pa: Pankow
-Re: Reinickendorf
-Sp: Spandau
-SZ: Steglitz-Zehlendorf
-TS: Tempelhof-Schöneberg
-TK: Treptow-Köpenick
-sum: sum for all the districts
-wsum: sum for last 7 days
-wavg: per-day average of new infections for last 7 days
-winc: incidence (x per 100k inhabitants) of new infections for last 7 days
+# Optimized for in-terminal curl.
+elif output_type == 'txt':
+ # Explain what this is.
+ intro = \
+"""Table of Berlin's Corona infection number development by districts.
+Updated daily at 9pm.
+
+Abbrevations/explanations:
+"""
+ for k in translate:
+ intro += "%s: %s\n" % (k, translate[k])
+ intro += """
Source code: https://plomlompom.com/repos/?p=berlin-corona-table
+
+HTML view: https://plomlompom.com/berlin_corona.html
"""
-print(intro)
+ print(intro)
-# Output table of enhanced daily infection data, newest on top, separated into
-# 7-day units.
-sorted_dates.reverse()
-weekday_count = 0
-for date in sorted_dates:
+ # Output table of enhanced daily infection data, newest on top,
+ # separated into 7-day units.
+ sorted_dates.reverse()
+ weekday_count = 0
+ sum_district = sorted_districts[-1]
+ for date in sorted_dates:
- # Week table header.
- if weekday_count == 0:
- print(' '*11, ' '.join(sorted_districts[:-1]),
- sorted_districts[-1], 'wsum', ' wavg', 'winc')
- week_start_date = date
+ # Week table header.
+ if weekday_count == 0:
+ print(' '*11, ' '.join(sorted_districts[:-1]),
+ sorted_districts[-1], 'wsum', ' wavg', 'winc')
+ week_start_date = date
- # Day data line.
- new_infections = []
- for district in sorted_districts:
- new_infections += [db[district][date]['new_infections']]
- week_sum = week_avg = week_inc = ''
- sum_district = sorted_districts[-1]
- sum_district_data = db[sum_district][date]
- if 'week_sum' in sum_district_data:
- week_sum = '%4s' % sum_district_data['week_sum']
- if 'week_average' in sum_district_data:
- week_avg = '%5.1f' % sum_district_data['week_average']
- if 'week_incidence' in sum_district_data:
- week_inc = '%4.1f' % sum_district_data['week_incidence']
- print(date, ' '.join(['%3s' % infections for infections in new_infections]),
- week_sum, week_avg, week_inc)
-
- # Maintain 7-day cycle.
- weekday_count += 1
- if weekday_count != 7:
- continue
- weekday_count = 0
+ # Day data line.
+ new_infections = []
+ for district in sorted_districts:
+ new_infections += [db[district][date]['new_infections']]
+ week_sum = week_avg = week_inc = ''
+ sum_district_data = db[sum_district][date]
+ if 'week_sum' in sum_district_data:
+ week_sum = '%4s' % sum_district_data['week_sum']
+ if 'week_average' in sum_district_data:
+ week_avg = '%5.1f' % sum_district_data['week_average']
+ if 'week_incidence' in sum_district_data:
+ week_inc = '%4.1f' % sum_district_data['week_incidence']
+ print(date, ' '.join(['%3s' % infections
+ for infections in new_infections]),
+ week_sum, week_avg, week_inc)
- # After each 7 days, print summary for individual districts.
- weekly_sums = []
- weekly_avgs = []
- weekly_incs = []
- for district in sorted_districts[:-1]:
- weekly_sums += [db[district][week_start_date]['week_sum']]
- weekly_avgs += [db[district][week_start_date]['week_average']]
- weekly_incs += [db[district][week_start_date]['week_incidence']]
- print()
- print('district stats for week from %s to %s:' % (date, week_start_date))
- print(' '*7, ' '.join(sorted_districts[:-1]))
- print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
- print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
- print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
- print()
+ # Maintain 7-day cycle.
+ weekday_count += 1
+ if weekday_count != 7:
+ continue
+ weekday_count = 0
+
+ # After each 7 days, print summary for individual districts.
+ weekly_sums = []
+ weekly_avgs = []
+ weekly_incs = []
+ for district in sorted_districts[:-1]:
+ weekly_sums += [db[district][week_start_date]['week_sum']]
+ weekly_avgs += [db[district][week_start_date]['week_average']]
+ weekly_incs += [db[district][week_start_date]['week_incidence']]
+ print()
+ print('district stats for week from %s to %s:' % (date, week_start_date))
+ print(' '*7, ' '.join(sorted_districts[:-1]))
+ print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
+ print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
+ print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
+ print()