X-Git-Url: https://plomlompom.com/repos/foo.html?a=blobdiff_plain;ds=sidebyside;f=scrape.py;fp=scrape.py;h=f11f315f5e73e8f3d6b5b3b98c0f7fbfac63267a;hb=88e62da91d09edd688abd38a5924b4a712eecbf5;hp=d95cfedfe44d2ee60f5f32f265b24c18efa5b6ea;hpb=a2f73f130d45247e0097278f0c1388583bf18986;p=berlin-corona-table
diff --git a/scrape.py b/scrape.py
index d95cfed..f11f315 100755
--- a/scrape.py
+++ b/scrape.py
@@ -88,23 +88,6 @@ data = {
'TK': {'growth': 14, 'total': 183},
'sum': {'growth': 263, 'total': 3486}
},
- # This one has no press release but can be reconstructed from
- # the neighbour ones.
- datetime.datetime(2020, 3, 13): {
- 'CW': {'growth': 16, 'total': 47},
- 'FK': {'growth': 8, 'total': 22},
- 'Li': {'growth': 2, 'total': 8},
- 'MH': {'growth': 1, 'total': 4},
- 'Mi': {'growth': 9, 'total': 29},
- 'Ne': {'growth': 6, 'total': 16},
- 'Pa': {'growth': 11, 'total': 26},
- 'Re': {'growth': 0, 'total': 11},
- 'Sp': {'growth': 1, 'total': 9},
- 'SZ': {'growth': 0, 'total': 20},
- 'TS': {'growth': 1, 'total': 17},
- 'TK': {'growth': 3, 'total': 7},
- 'sum': {'growth': 58, 'total': 216}
- },
# Here the growth numbers needed to be reconstructed.
datetime.datetime(2020, 3, 10): {
'CW': {'growth': 2, 'total': 15},
@@ -314,7 +297,8 @@ for path in day_urls:
if date <= datetime.datetime(2020, 3, 13):
date = date - datetime.timedelta(days=1)
table = soup.find('table')
- if table is None and date in data:
+ # For 13th of March we lack a press release.
+ if table is None and (date in data or date == datetime.datetime(2020, 3, 13)):
continue
data[date] = {}
for tr in [tr for tr in table.children if type(tr) == bs4.element.Tag][1:]:
@@ -334,6 +318,21 @@ for path in day_urls:
total = int(total_str.replace('.', ''))
data[date][district_short] = {'growth': growth, 'total': total}
first_run = False
+
+# Reconstruct data for 13th of March.
+day_target = datetime.datetime(2020, 3, 13)
+day_after = day_target + datetime.timedelta(days=1)
+day_before = day_target - datetime.timedelta(days=1)
+data[day_target] = {}
+for district in [d for d in districts_sorted]:
+ data[day_target][district] = {}
+ total_after = data[day_after][district]['total']
+ growth_after = data[day_after][district]['growth']
+ total_target = total_after - growth_after
+ data[day_target][district]['total'] = total_target
+ total_before = data[day_before][district]['total']
+ data[day_target][district]['growth'] = total_target - total_before
+
dates_sorted = list(data.keys())
dates_sorted.sort()
dates_sorted.reverse()