From d074cd51f4378884e977b26490af1c6d1dcc81c8 Mon Sep 17 00:00:00 2001 From: Christian Heller <c.heller@plomlompom.de> Date: Sun, 17 Jan 2016 20:38:57 +0100 Subject: [PATCH] Add content-type check to URL interpreter. --- plomlombot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plomlombot.py b/plomlombot.py index 179bc2a..7d915dc 100644 --- a/plomlombot.py +++ b/plomlombot.py @@ -103,6 +103,10 @@ while 1: for i in range(len(matches)): url = matches[i] webpage = urllib.request.urlopen(url) + content_type = webpage.info().get_content_type() + if not content_type in ('text/html', 'text/xml', + 'application/xhtml+xml'): + continue charset = webpage.info().get_content_charset() content = webpage.read().decode(charset) title = str(content).split('<title>')[1].split('</title>')[0] -- 2.30.2