X-Git-Url: https://plomlompom.com/repos/?p=berlin-corona-table;a=blobdiff_plain;f=scrape.py;h=e449b998b2821f4fe1fe9539fc1fbf28b2898b9c;hp=7c7196c93eef2cde7adc6a1586b1c42d7c394443;hb=4289e3ee6177babe630e973d5a2d5049538ab96e;hpb=d08b45bff0b5147825c60fa8f17b2ec384521963

diff --git a/scrape.py b/scrape.py
index 7c7196c..e449b99 100755
--- a/scrape.py
+++ b/scrape.py
@@ -104,12 +104,10 @@ fixes = {
    # compromise to keep as many surrounding numbers stable as possible.
    datetime.datetime(2020, 3, 26): {
        'SZ': {
-           'growth': 12,
-           'total': 132
+           'growth': 12
        },
        'sum': {
-           'growth': 286,
-           'total': 1931
+           'growth': 286
        }
    },
    # Here the official total is 220, while the summation of district
@@ -121,7 +119,7 @@ fixes = {
    },
 }
 
-# Scan navigation bar for maximum pagination value. 
+# Scan navigation bar for maximum pagination value.
 url = url_prefix + pm_dir
 with urllib.request.urlopen(url) as response:
    html = response.read()
@@ -147,7 +145,7 @@ for i in range(max_page):
             continue
         day_urls += [link['href']]
 
-# Collect infection data. 
+# Collect infection data.
 data = {}
 first_run = True
 districts_sorted = []
@@ -204,8 +202,10 @@ for date in dates_sorted:
     for district in [d for d in districts_sorted if not d=='sum']:
         prev_date = date - datetime.timedelta(days=1)
         if prev_date not in dates_sorted:
-            # TODO: ensure dates until end of list are continuous
-            continue
+           if prev_date >= date_limit:
+              raise Exception('Dates not contiguous: %s missing', prev_date)
+           else:
+              continue
         prev_total = data[date - datetime.timedelta(days=1)][district]['total']
         cur_total = data[date][district]['total']
         if cur_total - data[date][district]['growth'] != prev_total: