From 61a573d1cb2165c5aa7a00b999b9b5bda81caaaa Mon Sep 17 00:00:00 2001 From: Christian Heller Date: Sun, 17 Jan 2016 21:21:17 +0100 Subject: [PATCH] Assume default charset utf-8 when no charset found in URL interpreter. --- plomlombot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plomlombot.py b/plomlombot.py index 36228f3..e4a5be3 100644 --- a/plomlombot.py +++ b/plomlombot.py @@ -78,8 +78,11 @@ def url_check(msg): webpage = urllib.request.urlopen(url, timeout=15) content_type = webpage.info().get_content_type() charset = webpage.info().get_content_charset() - if not charset or not content_type in ('text/html', 'text/xml', + if not charset: + charset="utf-8" + if not content_type in ('text/html', 'text/xml', 'application/xhtml+xml'): + print("TROUBLE INTERPRETING URL: bad content_type " + content_type) continue content = webpage.read().decode(charset) title = str(content).split('')[1].split('')[0] -- 2.30.2