X-Git-Url: https://plomlompom.com/repos/do_day?a=blobdiff_plain;ds=sidebyside;f=enhance_table.py;h=8d0fd335ec2c62e077bb3b34fcd47f10920adb30;hb=7c0bea9a26a3e3a5466f1a75d41f4a48d2106ac2;hp=a626329bcd20236477e60b8bd80b93c38274bcaa;hpb=078d5207c9a6d7ad4611df311a5e11a635f6f30f;p=berlin-corona-table

diff --git a/enhance_table.py b/enhance_table.py
index a626329..8d0fd33 100755
--- a/enhance_table.py
+++ b/enhance_table.py
@@ -17,11 +17,65 @@ district_pops = {
   'sum': 3754418,
 }
 
-f = open('daily_infections_table.txt', 'r')
+# Map abbreviations to full names.
+translate = {
+  'CW': 'Charlottenburg-Wilmersdorf',
+  'FK': 'Friedrichshain-Kreuzberg',
+  'Li': 'Lichtenberg',
+  'MH': 'Marzahn-Hellersdorf',
+  'Mi': 'Mitte',
+  'Ne': 'NeukÃ¶lln',
+  'Pa': 'Pankow',
+  'Re': 'Reinickendorf',
+  'Sp': 'Spandau',
+  'SZ': 'Steglitz-Zehlendorf',
+  'TS': 'Tempelhof-SchÃ¶neberg',
+  'TK': 'Treptow-KÃ¶penick',
+  'sum': 'all of Berlin',
+  'wsum': 'sum for last 7 days',
+  'wavg': 'per-day average of new infections for last 7 days',
+  'winc': 'incidence (x per 100k inhabitants) of new infections for last 7 days',
+}
+
+# Read infections table path and output type.
+import sys
+if len(sys.argv) != 3:
+    print('Expecting infections table file path and output type as only arguments.')
+    exit(1)
+infections_table = sys.argv[1]
+output_type = sys.argv[2]
+
+# Read infections table file lines.
+f = open(infections_table, 'r')
 lines = f.readlines()
 f.close()
 
-# Parse first table file line for the names and order of districts. 
+# Basic input validation.
+import datetime
+header_elements = lines[0].split()
+if set(header_elements) != district_pops.keys() or \
+       len(header_elements) != len(district_pops.keys()):
+    raise Exception('infections table: invalid header')
+line_count = 0
+for line in lines[1:]:
+    line_count += 1
+    fields = line.split()
+    if len(header_elements) != len(fields) - 1:
+        raise Exception('infections table: too many elements on line %s',
+                        line_count)
+    try:
+        datetime.date.fromisoformat(fields[0])
+    except ValueError:
+        raise Exception('infections table: bad ISO date on line %s',
+                        line_count)
+    for field in fields[1:]:
+        try:
+            int(field)
+        except ValueError:
+            raise Exception('infections table: bad value on line %s',
+                            line_count)
+
+# Parse first table file line for the names and order of districts.
 db = {}
 sorted_districts = []
 for header in lines[0].split():
@@ -40,11 +94,14 @@ for line in lines[1:]:
         db[district][date] = {'new_infections': int(district_data)}
 sorted_dates.sort()
 
+# In LaGeSo's data, the last "district" is actually the sum of all districts /
+# the whole of Berlin.
+#
 # Fail on any day where the "sum" district's new infections are not the proper
 # sum of the individual districts new infections.  Yes, sometimes Lageso sends
 # data that is troubled in this way.  It will then have to be fixed manually in
 # the table file, since we should have a human look at what mistake was
-# probably made. 
+# probably made.
 for date in sorted_dates:
     sum_district = sorted_districts[-1]
     day_sum = 0
@@ -60,9 +117,9 @@ for i in range(len(sorted_dates)):
     if i < 6:
         continue
     date = sorted_dates[i]
-    week_dates = [] 
+    week_dates = []
     for j in range(7):
-        week_dates += [sorted_dates[i - j]]        
+        week_dates += [sorted_dates[i - j]]
     for district in sorted_districts:
         district_pop = district_pops[district]
         week_sum = 0
@@ -72,79 +129,130 @@ for i in range(len(sorted_dates)):
         db[district][date]['week_average'] = week_sum / 7
         db[district][date]['week_incidence'] = (week_sum / district_pop) * 100000
 
-# Explain what this is.
-intro = """
-Table of Berlin's Corona infection number development by districts, daily
-updated around 9pm.
+# Optimized for web browser viewing.
+if output_type == 'html':
+    print("""<!DOCTYPE html>
+<html>
+<head>
+<style>
+table, tr, th, td { border: 1px solid black; }
+.day_row:nth-child(7n+2) { background-color: yellow; }
+.district_name { writing-mode: vertical-rl; transform: rotate(180deg); }
+</style>
+<title>Table of Berlin's Corona infection number development by districts</title>
+</head>
+<h1>Table of Berlin's Corona infection number development by districts</h1>
+<p>Updated daily at 9pm. <a href="https://plomlompom.com/repos/?p=berlin-corona-table">Source code</a>. <a href="berlin_corona.txt">Text view optimized for terminal curl</a>.</p>
+<table>
+<tr>
+<th>date</th>""")
+    sorted_dates.reverse()
+    sum_district = sorted_districts[-1]
+    for district in sorted_districts:
+        long_form = translate[district]
+        if sum_district == district:
+            print('<th>%s</th>' % long_form)
+        else:
+            print('<th class="district_name">%s</th>' % long_form)
+    print('</tr>')
+    for date in sorted_dates:
+        print('<tr class="day_row">')
+        print('<td>%s</td>' % date)
+        long_wsum = translate['wsum']
+        long_wavg = translate['wavg']
+        long_winc = translate['winc']
+        for district in sorted_districts:
+            district_data = db[district][date]
+            week_sum = week_avg = week_inc = '(not enough data)'
+            new_infections = district_data['new_infections']
+            if 'week_sum' in district_data:
+                week_sum = '%s' % district_data['week_sum']
+            if 'week_average' in district_data:
+                week_avg = '%.1f' % district_data['week_average']
+            if 'week_incidence' in district_data:
+                week_inc = '%.1f' % district_data['week_incidence']
+            print('<td>')
+            print(new_infections)
+            if district != sum_district:
+                print('<details><summary></summary>')
+            print('<table>')
+            print('<tr><th>%s</th><td>%s</td></tr>' % (long_wsum, week_sum))
+            print('<tr><th>%s</th><td>%s</td></tr>' % (long_wavg, week_avg))
+            print('<tr><th>%s</th><td>%s</td></tr>' % (long_winc, week_inc))
+            print('</table>')
+            if district != sum_district:
+                print('</details>')
+            print('</td>')
+        print('</tr>')
+    print('</table>')
+    print('</html>')
 
-Abbrevations/explanations:
-CW: Charlottenburg-Wilmersdorf
-FK: Friedrichshain-Kreuzberg
-Li: Lichtenberg
-MH: Marzahn-Hellersdorf
-Mi: Mitte
-Ne: NeukÃ¶lln
-Pa: Pankow
-Re: Reinickendorf
-Sp: Spandau
-SZ: Steglitz-Zehlendorf
-TS: Tempelhof-SchÃ¶neberg
-TK: Treptow-KÃ¶penick
-sum: sum for all the districts
-wsum: sum for last 7 days
-wavg: per-day average of new infections for last 7 days
-winc: incidence (x per 100k inhabitants) of new infections for last 7 days
+# Optimized for in-terminal curl.
+elif output_type == 'txt':
 
+    # Explain what this is.
+    intro = \
+"""Table of Berlin's Corona infection number development by districts.
+Updated daily at 9pm.
+
+Abbrevations/explanations:
+"""
+    for k in translate:
+        intro += "%s: %s\n" % (k, translate[k])
+    intro += """
 Source code: https://plomlompom.com/repos/?p=berlin-corona-table
+
+HTML view: https://plomlompom.com/berlin_corona.html
 """
-print(intro)
+    print(intro)
 
-# Output table of enhanced daily infection data, newest on top, separated into
-# 7-day units.
-sorted_dates.reverse()
-weekday_count = 0
-for date in sorted_dates:
+    # Output table of enhanced daily infection data, newest on top,
+    # separated into 7-day units.
+    sorted_dates.reverse()
+    weekday_count = 0
+    sum_district = sorted_districts[-1]
+    for date in sorted_dates:
 
-    # Week table header.
-    if weekday_count == 0:
-        print(' '*11, '  '.join(sorted_districts[:-1]),
-              sorted_districts[-1], 'wsum', ' wavg', 'winc')
-        week_start_date = date 
+        # Week table header.
+        if weekday_count == 0:
+            print(' '*11, '  '.join(sorted_districts[:-1]),
+                  sorted_districts[-1], 'wsum', ' wavg', 'winc')
+            week_start_date = date
 
-    # Day data line. 
-    new_infections = []
-    for district in sorted_districts:
-        new_infections += [db[district][date]['new_infections']]
-    week_sum = week_avg = week_inc = ''
-    sum_district = sorted_districts[-1]
-    sum_district_data = db[sum_district][date]
-    if 'week_sum' in sum_district_data:
-        week_sum = '%4s' % sum_district_data['week_sum'] 
-    if 'week_average' in sum_district_data:
-        week_avg = '%5.1f' % sum_district_data['week_average'] 
-    if 'week_incidence' in sum_district_data:
-        week_inc = '%4.1f' % sum_district_data['week_incidence'] 
-    print(date, ' '.join(['%3s' % infections for infections in new_infections]),
-          week_sum, week_avg, week_inc) 
-
-    # Maintain 7-day cycle.
-    weekday_count += 1
-    if weekday_count != 7:
-        continue
-    weekday_count = 0
+        # Day data line.
+        new_infections = []
+        for district in sorted_districts:
+            new_infections += [db[district][date]['new_infections']]
+        week_sum = week_avg = week_inc = ''
+        sum_district_data = db[sum_district][date]
+        if 'week_sum' in sum_district_data:
+            week_sum = '%4s' % sum_district_data['week_sum']
+        if 'week_average' in sum_district_data:
+            week_avg = '%5.1f' % sum_district_data['week_average']
+        if 'week_incidence' in sum_district_data:
+            week_inc = '%4.1f' % sum_district_data['week_incidence']
+        print(date, ' '.join(['%3s' % infections
+                              for infections in new_infections]),
+              week_sum, week_avg, week_inc)
 
-    # After each 7 days, print summary for individual districts.
-    weekly_sums = []
-    weekly_avgs = []
-    weekly_incs = []
-    for district in sorted_districts[:-1]:
-        weekly_sums += [db[district][week_start_date]['week_sum']]
-        weekly_avgs += [db[district][week_start_date]['week_average']]
-        weekly_incs += [db[district][week_start_date]['week_incidence']]
-    print()
-    print('district stats for week from %s to %s:' % (date, week_start_date))
-    print(' '*7, '    '.join(sorted_districts[:-1]))
-    print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
-    print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
-    print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
-    print()
+        # Maintain 7-day cycle.
+        weekday_count += 1
+        if weekday_count != 7:
+            continue
+        weekday_count = 0
+
+        # After each 7 days, print summary for individual districts.
+        weekly_sums = []
+        weekly_avgs = []
+        weekly_incs = []
+        for district in sorted_districts[:-1]:
+            weekly_sums += [db[district][week_start_date]['week_sum']]
+            weekly_avgs += [db[district][week_start_date]['week_average']]
+            weekly_incs += [db[district][week_start_date]['week_incidence']]
+        print()
+        print('district stats for week from %s to %s:' % (date, week_start_date))
+        print(' '*7, '    '.join(sorted_districts[:-1]))
+        print('wsum', ' '.join(['%5.1f' % wsum for wsum in weekly_sums]))
+        print('wavg', ' '.join(['%5.1f' % wavg for wavg in weekly_avgs]))
+        print('winc', ' '.join(['%5.1f' % winc for winc in weekly_incs]))
+        print()