home · contact · privacy
Add content-type check to URL interpreter.
authorChristian Heller <c.heller@plomlompom.de>
Sun, 17 Jan 2016 19:38:57 +0000 (20:38 +0100)
committerChristian Heller <c.heller@plomlompom.de>
Sun, 17 Jan 2016 19:38:57 +0000 (20:38 +0100)
plomlombot.py

index 179bc2a2221d90da84580f57898aa3b704fca0f5..7d915dc2f80c79157119089aa2e0dd6e9566d16a 100644 (file)
@@ -103,6 +103,10 @@ while 1:
             for i in range(len(matches)):
                 url = matches[i]
                 webpage = urllib.request.urlopen(url)
+                content_type = webpage.info().get_content_type()
+                if not content_type in ('text/html', 'text/xml',
+                    'application/xhtml+xml'):
+                    continue
                 charset = webpage.info().get_content_charset()
                 content = webpage.read().decode(charset)
                 title = str(content).split('<title>')[1].split('</title>')[0]