'sum': {'Summe', 'Berlin'},
}
-# Here only image files are available for the table data.
-unparsable_graphics_fallback = {
+# some pre-filled values
+data = {
+ # For these, only image files are available for the table data.
datetime.datetime(2020, 7, 2): {
'CW': {'growth': 4, 'total': 851},
'FK': {'growth': 10, 'total': 681},
'TS': {'growth': 41, 'total': 362},
'TK': {'growth': 14, 'total': 183},
'sum': {'growth': 263, 'total': 3486}
- }
+ },
+ # This one has no press release but can be reconstructed from
+ # the neighbour ones.
+ datetime.datetime(2020, 3, 13): {
+ 'CW': {'growth': 16, 'total': 47},
+ 'FK': {'growth': 8, 'total': 22},
+ 'Li': {'growth': 2, 'total': 8},
+ 'MH': {'growth': 1, 'total': 4},
+ 'Mi': {'growth': 9, 'total': 29},
+ 'Ne': {'growth': 6, 'total': 16},
+ 'Pa': {'growth': 11, 'total': 26},
+ 'Re': {'growth': 0, 'total': 11},
+ 'Sp': {'growth': 1, 'total': 9},
+ 'SZ': {'growth': 0, 'total': 20},
+ 'TS': {'growth': 1, 'total': 17},
+ 'TK': {'growth': 3, 'total': 7},
+ 'sum': {'growth': 58, 'total': 216}
+ }
}
fixes = {
# Here the official total is 215, while the summation of district
day_urls += [link['href']]
# Collect infection data.
-data = {}
first_run = True
districts_sorted = []
# TODO: Push limit further back (might need more data fixes for that).
-date_limit = datetime.datetime(2020, 3, 16)
+date_limit = datetime.datetime(2020, 3, 12)
for path in day_urls:
url = url_prefix + path
with urllib.request.urlopen(url) as response:
date = datetime.datetime.strptime(date_formatted , '%d.%m.%Y')
if date_limit > date:
break
- if date in data:
- raise Exception('Double date %s', date)
- #date -= datetime.timedelta(days=1)
- data[date] = {}
+ # On that day, two press releases were released, for that and the prev day.
+ if date == datetime.datetime(2020, 3, 15) and date in data:
+ date = datetime.datetime(2020, 3, 14)
+ # From here on, press releases describe numbers from prev day.
+ if date <= datetime.datetime(2020, 3, 13):
+ date = date - datetime.timedelta(days=1)
table = soup.find('table')
- if table is None:
- data[date] = unparsable_graphics_fallback[date]
+ if table is None and date in data:
continue
+ data[date] = {}
for tr in [tr for tr in table.children if type(tr) == bs4.element.Tag][1:]:
printable_tds = []
for td in [td for td in tr.children if type(td) == bs4.element.Tag][:2]:
raise Exception('Dates not contiguous: %s missing', prev_date)
else:
continue
- prev_total = data[date - datetime.timedelta(days=1)][district]['total']
+ prev_total = data[prev_date][district]['total']
cur_total = data[date][district]['total']
if cur_total - data[date][district]['growth'] != prev_total:
raise Exception('Questionable district infection total in %s/%s' % (district, date))