URL interpreter: Handle HTTPErrors.

[plomlombot-irc.git] / plomlombot.py
diff --git a/plomlombot.py b/plomlombot.py

index cf6bae90864be27ec6a750876b72070e36287774..ffb4192ac8f2925b1d6a34684acfd585eec82c6f 100644 (file)
--- a/plomlombot.py
+++ b/plomlombot.py
@@ -75,11 +75,18 @@ def url_check(msg):
      matches = re.findall("(https?://[^\s]+)", msg)
      for i in range(len(matches)):
          url = matches[i]
      matches = re.findall("(https?://[^\s]+)", msg)
      for i in range(len(matches)):
          url = matches[i]
-        webpage = urllib.request.urlopen(url)
-        content_type = webpage.info().get_content_type()
+        try:
+            webpage = urllib.request.urlopen(url, timeout=15)
+        except urllib.error.HTTPError as error:
+            print("TROUBLE FOLLOWING URL: " + str(error))
+            continue
          charset = webpage.info().get_content_charset()
          charset = webpage.info().get_content_charset()
-        if not charset or not content_type in ('text/html', 'text/xml',
+        if not charset:
+            charset="utf-8"
+        content_type = webpage.info().get_content_type()
+        if not content_type in ('text/html', 'text/xml',
                  'application/xhtml+xml'):
                  'application/xhtml+xml'):
+            print("TROUBLE INTERPRETING URL: bad content_type " + content_type)
              continue
          content = webpage.read().decode(charset)
          title = str(content).split('<title>')[1].split('</title>')[0]
              continue
          content = webpage.read().decode(charset)
          title = str(content).split('<title>')[1].split('</title>')[0]