home · contact · privacy
Assume default charset utf-8 when no charset found in URL interpreter.
[plomlombot-irc.git] / plomlombot.py
index cf6bae90864be27ec6a750876b72070e36287774..e4a5be35aabd091204209eff379a542f206c7e4e 100644 (file)
@@ -75,11 +75,14 @@ def url_check(msg):
     matches = re.findall("(https?://[^\s]+)", msg)
     for i in range(len(matches)):
         url = matches[i]
-        webpage = urllib.request.urlopen(url)
+        webpage = urllib.request.urlopen(url, timeout=15)
         content_type = webpage.info().get_content_type()
         charset = webpage.info().get_content_charset()
-        if not charset or not content_type in ('text/html', 'text/xml',
+        if not charset:
+            charset="utf-8"
+        if not content_type in ('text/html', 'text/xml',
                 'application/xhtml+xml'):
+            print("TROUBLE INTERPRETING URL: bad content_type " + content_type)
             continue
         content = webpage.read().decode(charset)
         title = str(content).split('<title>')[1].split('</title>')[0]