From: Christian Heller Date: Sun, 17 Jan 2016 20:21:17 +0000 (+0100) Subject: Assume default charset utf-8 when no charset found in URL interpreter. X-Git-Url: https://plomlompom.com/repos/%7B%7Bprefix%7D%7D/foo.html?a=commitdiff_plain;h=61a573d1cb2165c5aa7a00b999b9b5bda81caaaa;p=plomlombot-irc.git Assume default charset utf-8 when no charset found in URL interpreter. --- diff --git a/plomlombot.py b/plomlombot.py index 36228f3..e4a5be3 100644 --- a/plomlombot.py +++ b/plomlombot.py @@ -78,8 +78,11 @@ def url_check(msg): webpage = urllib.request.urlopen(url, timeout=15) content_type = webpage.info().get_content_type() charset = webpage.info().get_content_charset() - if not charset or not content_type in ('text/html', 'text/xml', + if not charset: + charset="utf-8" + if not content_type in ('text/html', 'text/xml', 'application/xhtml+xml'): + print("TROUBLE INTERPRETING URL: bad content_type " + content_type) continue content = webpage.read().decode(charset) title = str(content).split('')[1].split('')[0]