X-Git-Url: https://plomlompom.com/repos/?p=plomlombot-irc.git;a=blobdiff_plain;f=plomlombot.py;h=e4a5be35aabd091204209eff379a542f206c7e4e;hp=cf6bae90864be27ec6a750876b72070e36287774;hb=61a573d1cb2165c5aa7a00b999b9b5bda81caaaa;hpb=81f2ce0b5a23a87f013f225193de67e85305a0fa diff --git a/plomlombot.py b/plomlombot.py index cf6bae9..e4a5be3 100644 --- a/plomlombot.py +++ b/plomlombot.py @@ -75,11 +75,14 @@ def url_check(msg): matches = re.findall("(https?://[^\s]+)", msg) for i in range(len(matches)): url = matches[i] - webpage = urllib.request.urlopen(url) + webpage = urllib.request.urlopen(url, timeout=15) content_type = webpage.info().get_content_type() charset = webpage.info().get_content_charset() - if not charset or not content_type in ('text/html', 'text/xml', + if not charset: + charset="utf-8" + if not content_type in ('text/html', 'text/xml', 'application/xhtml+xml'): + print("TROUBLE INTERPRETING URL: bad content_type " + content_type) continue content = webpage.read().decode(charset) title = str(content).split('')[1].split('')[0]