home · contact · privacy
Add script to output enhanced table data.
authorChristian Heller <c.heller@plomlompom.de>
Tue, 30 Jun 2020 21:16:57 +0000 (23:16 +0200)
committerChristian Heller <c.heller@plomlompom.de>
Tue, 30 Jun 2020 21:16:57 +0000 (23:16 +0200)
enhance_table.py [new file with mode: 0755]

diff --git a/enhance_table.py b/enhance_table.py
new file mode 100755 (executable)
index 0000000..3860ea6
--- /dev/null
@@ -0,0 +1,123 @@
+#!//usr/bin/env python3
+
+# District population numbers as per Wikipedia.
+district_pops = {
+  'CW': 342332,
+  'FK': 289762,
+  'Li': 291452,
+  'MH': 268548,
+  'Mi': 384172,
+  'Ne': 329691,
+  'Pa': 407765,
+  'Re': 265225,
+  'Sp': 243977,
+  'SZ': 308697,
+  'TS': 351644,
+  'TK': 271153,
+  'sum': 3754418,
+}
+
+f = open('daily_infections_table.txt', 'r')
+lines = f.readlines()
+f.close()
+
+# Parse first table file line for the names and order of districts. 
+db = {}
+sorted_districts = []
+for header in lines[0].split():
+    sorted_districts += [header]
+    db[header] = {}
+
+# Seed DB with daily new infections data per district, per date.
+sorted_dates = []
+for line in lines[1:]:
+    fields = line.split()
+    date = fields[0]
+    sorted_dates += [date]
+    for i in range(len(sorted_districts)):
+        district = sorted_districts[i]
+        district_data = fields[i + 1]
+        db[district][date] = {'new_infections': int(district_data)}
+sorted_dates.sort()
+
+# Fail on any day where the "sum" district's new infections are not the proper
+# sum of the individual districts new infections.  Yes, sometimes Lageso sends
+# data that is troubled in this way.  It will then have to be fixed manually in
+# the table file, since we should have a human look at what mistake was
+# probably made. 
+for date in sorted_dates:
+    sum_district = sorted_districts[-1]
+    day_sum = 0
+    for district in sorted_districts[:-1]:
+        day_sum += db[district][date]['new_infections']
+    if day_sum != db[sum_district][date]['new_infections']:
+        raise Exception('Questionable district infection sum in %s' % date)
+
+# Enhance DB with data about weekly sums, averages, incidences per day.  Ignore
+# days that have less than 6 predecessors (we can only know a weekly average if
+# we have a whole week of data).
+for i in range(len(sorted_dates)):
+    if i < 6:
+        continue
+    date = sorted_dates[i]
+    week_dates = [] 
+    for j in range(7):
+        week_dates += [sorted_dates[i - j]]        
+    for district in sorted_districts:
+        district_pop = district_pops[district]
+        week_sum = 0
+        for week_date in week_dates:
+            week_sum += db[district][week_date]['new_infections']
+        db[district][date]['week_sum'] = week_sum
+        db[district][date]['week_average'] = week_sum / 7
+        db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
+
+# Output table of enhanced daily infection data, newest on top, separated into
+# 7-day units.
+sorted_dates.reverse()
+weekday_count = 0
+for date in sorted_dates:
+
+    # Week table header.
+    if weekday_count == 0:
+        print(' '*11, '  '.join(sorted_districts[:-1]),
+              sorted_districts[-1], 'wsum', ' wavg', 'winc')
+        week_start_date = date 
+
+    # Day data line. 
+    new_infections = []
+    for district in sorted_districts:
+        new_infections += [db[district][date]['new_infections']]
+    week_sum = week_avg = week_inc = ''
+    sum_district = sorted_districts[-1]
+    sum_district_data = db[sum_district][date]
+    if 'week_sum' in sum_district_data:
+        week_sum = '%4s' % sum_district_data['week_sum'] 
+    if 'week_average' in sum_district_data:
+        week_avg = '%5.1f' % sum_district_data['week_average'] 
+    if 'week_incidence' in sum_district_data:
+        week_inc = '%4.1f' % sum_district_data['week_incidence'] 
+    print(date, ' '.join(['%3s' % infections for infections in new_infections]),
+          week_sum, week_avg, week_inc) 
+
+    # Maintain 7-day cycle.
+    weekday_count += 1
+    if weekday_count != 7:
+        continue
+    weekday_count = 0
+
+    # After each 7 days, print summary for individual districts.
+    weekly_sums = []
+    weekly_avgs = []
+    weekly_incs = []
+    for district in sorted_districts[:-1]:
+        weekly_sums += [db[district][week_start_date]['week_sum']]
+        weekly_avgs += [db[district][week_start_date]['week_average']]
+        weekly_incs += [db[district][week_start_date]['week_incidence']]
+    print()
+    print('district stats for week from %s to %s:' % (date, week_start_date))
+    print(' '*7, '    '.join(sorted_districts[:-1]))
+    print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
+    print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
+    print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
+    print()