Remove redundant whitespace.

[berlin-corona-table] / scrape.py
diff --git a/scrape.py b/scrape.py

index 7c7196c93eef2cde7adc6a1586b1c42d7c394443..e449b998b2821f4fe1fe9539fc1fbf28b2898b9c 100755 (executable)
--- a/scrape.py
+++ b/scrape.py
@@ -104,12 +104,10 @@ fixes = {
     # compromise to keep as many surrounding numbers stable as possible.
     datetime.datetime(2020, 3, 26): {
         'SZ': {
-           'growth': 12,
-           'total': 132
+           'growth': 12
         },
         'sum': {
-           'growth': 286,
-           'total': 1931
+           'growth': 286
         }
     },
     # Here the official total is 220, while the summation of district
@@ -121,7 +119,7 @@ fixes = {
     },
  }
  
-# Scan navigation bar for maximum pagination value. 
+# Scan navigation bar for maximum pagination value.
  url = url_prefix + pm_dir
  with urllib.request.urlopen(url) as response:
     html = response.read()
@@ -147,7 +145,7 @@ for i in range(max_page):
              continue
          day_urls += [link['href']]
  
-# Collect infection data. 
+# Collect infection data.
  data = {}
  first_run = True
  districts_sorted = []
@@ -204,8 +202,10 @@ for date in dates_sorted:
      for district in [d for d in districts_sorted if not d=='sum']:
          prev_date = date - datetime.timedelta(days=1)
          if prev_date not in dates_sorted:
-            # TODO: ensure dates until end of list are continuous
-            continue
+           if prev_date >= date_limit:
+              raise Exception('Dates not contiguous: %s missing', prev_date)
+           else:
+              continue
          prev_total = data[date - datetime.timedelta(days=1)][district]['total']
          cur_total = data[date][district]['total']
          if cur_total - data[date][district]['growth'] != prev_total: