From 42c69ab892b5a9d2dc2c68a059e7b506a00197a0 Mon Sep 17 00:00:00 2001 From: Christian Heller Date: Mon, 18 Apr 2016 22:45:37 +0200 Subject: [PATCH] Handle URLs for too large websites / files. --- plomlombot.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/plomlombot.py b/plomlombot.py index 7224b29..aa7061c 100755 --- a/plomlombot.py +++ b/plomlombot.py @@ -339,17 +339,22 @@ def handle_url(url, notice, show_url=False): return True try: - r = requests.get(url, timeout=15) + r = requests.get(url, timeout=5, stream=True) + r.raw.decode_content = True + text = r.raw.read(10000000+1) + if len(text) > 10000000: + raise ValueError('Too large a response') except (requests.exceptions.TooManyRedirects, requests.exceptions.ConnectionError, requests.exceptions.InvalidURL, UnicodeError, + ValueError, requests.exceptions.InvalidSchema) as error: notice("TROUBLE FOLLOWING URL: " + str(error)) return if mobile_twitter_hack(url): return - title = bs4.BeautifulSoup(r.text, "html5lib").title + title = bs4.BeautifulSoup(text, "html5lib").title if title and title.string: prefix = "PAGE TITLE: " if show_url: -- 2.30.2