X-Git-Url: https://plomlompom.com/repos/new_day?a=blobdiff_plain;f=plomlombot.py;h=e4a5be35aabd091204209eff379a542f206c7e4e;hb=61a573d1cb2165c5aa7a00b999b9b5bda81caaaa;hp=7fa9194af8236e27fd7d547843fc1a1e3577dc8b;hpb=19b5218056b02ed8454cdc095560856bdf48b8b9;p=plomlombot-irc.git diff --git a/plomlombot.py b/plomlombot.py index 7fa9194..e4a5be3 100644 --- a/plomlombot.py +++ b/plomlombot.py @@ -71,6 +71,24 @@ class IO: line) return line +def url_check(msg): + matches = re.findall("(https?://[^\s]+)", msg) + for i in range(len(matches)): + url = matches[i] + webpage = urllib.request.urlopen(url, timeout=15) + content_type = webpage.info().get_content_type() + charset = webpage.info().get_content_charset() + if not charset: + charset="utf-8" + if not content_type in ('text/html', 'text/xml', + 'application/xhtml+xml'): + print("TROUBLE INTERPRETING URL: bad content_type " + content_type) + continue + content = webpage.read().decode(charset) + title = str(content).split('