From: Christian Heller Date: Mon, 18 Apr 2016 20:45:37 +0000 (+0200) Subject: Handle URLs for too large websites / files. X-Git-Url: https://plomlompom.com/repos/%7B%7Bdb.prefix%7D%7D/%7B%7Bprefix%7D%7D/foo.html?a=commitdiff_plain;h=42c69ab892b5a9d2dc2c68a059e7b506a00197a0;p=plomlombot-irc.git Handle URLs for too large websites / files. --- diff --git a/plomlombot.py b/plomlombot.py index 7224b29..aa7061c 100755 --- a/plomlombot.py +++ b/plomlombot.py @@ -339,17 +339,22 @@ def handle_url(url, notice, show_url=False): return True try: - r = requests.get(url, timeout=15) + r = requests.get(url, timeout=5, stream=True) + r.raw.decode_content = True + text = r.raw.read(10000000+1) + if len(text) > 10000000: + raise ValueError('Too large a response') except (requests.exceptions.TooManyRedirects, requests.exceptions.ConnectionError, requests.exceptions.InvalidURL, UnicodeError, + ValueError, requests.exceptions.InvalidSchema) as error: notice("TROUBLE FOLLOWING URL: " + str(error)) return if mobile_twitter_hack(url): return - title = bs4.BeautifulSoup(r.text, "html5lib").title + title = bs4.BeautifulSoup(text, "html5lib").title if title and title.string: prefix = "PAGE TITLE: " if show_url: