X-Git-Url: https://plomlompom.com/repos/?p=plomlombot-irc.git;a=blobdiff_plain;f=plomlombot.py;h=9e9d0a44ce21bc1d07204dcb9780c54ac1b31d0e;hp=efd8474a56215fbb96a1ab756ec829aaefa481f7;hb=9260bc740d86b0b83d90c31eea2f898dfb2cf594;hpb=61948f17eafecaae17078d916ba2193983eaa339 diff --git a/plomlombot.py b/plomlombot.py old mode 100644 new mode 100755 index efd8474..9e9d0a4 --- a/plomlombot.py +++ b/plomlombot.py @@ -1,27 +1,51 @@ +#!/usr/bin/python3 + +import argparse import socket import datetime import select import time import re import urllib.request +import http.client import html +import html.parser -SERVERNET = "irc.freenode.net" +# Defaults, may be overwritten by command line arguments. +SERVER = "irc.freenode.net" PORT = 6667 TIMEOUT = 240 USERNAME = "plomlombot" NICKNAME = USERNAME -CHANNEL = "#zrolaps-test" + + +class HTMLParser(html.parser.HTMLParser): + def __init__(self, html, tag): + super().__init__() + self._tag_to_check = tag + self._tag = "" + self.data = "" + self.feed(html) + def handle_starttag(self, tag, attrs): + if self.data == "" and tag == self._tag_to_check: + self._tag = tag + def handle_endtag(self, tag): + self._tag = "" + def handle_data(self, data): + if self._tag != "": + self.data = data + class ExceptionForRestart(Exception): pass + class IO: - def __init__(self, servernet, port, timeout): + def __init__(self, server, port, timeout): self.timeout = timeout self.socket = socket.socket() - self.socket.connect((servernet, port)) + self.socket.connect((server, port)) self.socket.setblocking(0) self.line_buffer = [] self.rune_buffer = "" @@ -41,7 +65,7 @@ class IO: if len(msg.encode("utf-8")) > 510: print("NOT SENT LINE TO SERVER (too long): " + msg) print("LINE TO SERVER: " - + str(datetime.datetime.now()) + ": " + msg) + + str(datetime.datetime.now()) + ": " + msg) msg = msg + "\r\n" msg_len = len(msg) total_sent_len = 0 @@ -61,11 +85,15 @@ class IO: self._pingtest(send_ping) return None self.last_pong = time.time() - received_runes = self.socket.recv(1024).decode("UTF-8") + received_bytes = self.socket.recv(1024) + try: + received_runes = received_bytes.decode("UTF-8") + except UnicodeDecodeError: + received_runes = received_bytes.decode("latin1") if len(received_runes) == 0: print("SOCKET CONNECTION BROKEN") raise ExceptionForRestart - self.rune_buffer += received_runes + self.rune_buffer += received_runes lines_split = str.split(self.rune_buffer, "\r\n") self.line_buffer += lines_split[:-1] self.rune_buffer = lines_split[-1] @@ -76,44 +104,54 @@ class IO: line = self._recv_line_wrapped(send_ping) if line: print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " + - line) + line) return line -def init_session(servernet, port, timeout, nickname, username, channel): - print("CONNECTING TO " + servernet) - io = IO(servernet, port, timeout) + +def init_session(server, port, timeout, nickname, username, channel): + print("CONNECTING TO " + server) + io = IO(server, port, timeout) io.send_line("NICK " + nickname) io.send_line("USER " + username + " 0 * : ") io.send_line("JOIN " + channel) return io + def lineparser_loop(io, nickname): def act_on_privmsg(tokens): def url_check(msg): - matches = re.findall("(https?://[^\s]+)", msg) + + def notice(msg): + io.send_line("NOTICE " + target + " :" + msg) + + matches = re.findall("(https?://[^\s>]+)", msg) for i in range(len(matches)): url = matches[i] + request = urllib.request.Request(url, headers={ + "User-Agent": "plomlombot" + }) try: - webpage = urllib.request.urlopen(url, timeout=15) - except urllib.error.HTTPError as error: - print("TROUBLE FOLLOWING URL: " + str(error)) + webpage = urllib.request.urlopen(request, timeout=15) + except (urllib.error.HTTPError, urllib.error.URLError, + UnicodeError, http.client.BadStatusLine) as error: + notice("TROUBLE FOLLOWING URL: " + str(error)) continue charset = webpage.info().get_content_charset() if not charset: - charset="utf-8" + notice("TROUBLE READING PAGE TITLE: no charset in header") + continue content_type = webpage.info().get_content_type() - if not content_type in ('text/html', 'text/xml', - 'application/xhtml+xml'): - print("TROUBLE INTERPRETING URL: bad content type " - + content_type) + if content_type not in ('text/html', 'text/xml', + 'application/xhtml+xml'): + notice("TROUBLE READING PAGE TITLE: bad content type " + + content_type) continue content = webpage.read().decode(charset) - title = str(content).split('')[1].split('')[0] + title = HTMLParser(content, "title").data title = html.unescape(title) - io.send_line("PRIVMSG " + target + " :page title for url: " - + title) + notice("PAGE TITLE FOR URL: " + title) sender = "" for rune in tokens[0]: @@ -133,7 +171,7 @@ def lineparser_loop(io, nickname): msg = str.join(" ", tokens[3:])[1:] url_check(msg) - while 1: + while True: line = io.recv_line() if not line: continue @@ -143,11 +181,37 @@ def lineparser_loop(io, nickname): act_on_privmsg(tokens) if tokens[0] == "PING": io.send_line("PONG " + tokens[1]) -while 1: + + +def parse_command_line_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument("-s, --server", action="store", dest="server", + default=SERVER, + help="server or server net to connect to (default: " + + SERVER + ")") + parser.add_argument("-p, --port", action="store", dest="port", type=int, + default=PORT, help="port to connect to (default : " + + str(PORT) + ")") + parser.add_argument("-t, --timeout", action="store", dest="timeout", + type=int, default=TIMEOUT, + help="timeout in seconds after which to attempt " + + "reconnect (default: " + str(TIMEOUT) + ")") + parser.add_argument("-u, --username", action="store", dest="username", + default=USERNAME, help="username to use (default: " + + USERNAME + ")") + parser.add_argument("-n, --nickname", action="store", dest="nickname", + default=NICKNAME, help="nickname to use (default: " + + NICKNAME + ")") + parser.add_argument("CHANNEL", action="store", help="channel to join") + opts, unknown = parser.parse_known_args() + return opts + +opts = parse_command_line_arguments() +while True: try: - io = init_session(SERVERNET, PORT, TIMEOUT, NICKNAME, USERNAME, - CHANNEL) - lineparser_loop(io, NICKNAME) + io = init_session(opts.server, opts.port, opts.timeout, opts.nickname, + opts.username, opts.CHANNEL) + lineparser_loop(io, opts.nickname) except ExceptionForRestart: io.socket.close() continue