home · contact · privacy
Fix buggy HTMLParser.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import urllib.request
10 import http.client
11 import html
12 import html.parser
13
14 # Defaults, may be overwritten by command line arguments.
15 SERVER = "irc.freenode.net"
16 PORT = 6667
17 TIMEOUT = 240
18 USERNAME = "plomlombot"
19 NICKNAME = USERNAME
20
21
22 class HTMLParser(html.parser.HTMLParser):
23     def __init__(self, html, tag):
24         super().__init__()
25         self._tag_to_check = tag
26         self._tag = ""
27         self.data = ""
28         self.feed(html)
29     def handle_starttag(self, tag, attrs):
30         if self.data == "" and tag == self._tag_to_check:
31             self._tag = tag
32     def handle_endtag(self, tag):
33         self._tag = ""
34     def handle_data(self, data):
35         if self._tag != "":
36             self.data = data
37
38
39 class ExceptionForRestart(Exception):
40     pass
41
42
43 class IO:
44
45     def __init__(self, server, port, timeout):
46         self.timeout = timeout
47         self.socket = socket.socket()
48         self.socket.connect((server, port))
49         self.socket.setblocking(0)
50         self.line_buffer = []
51         self.rune_buffer = ""
52         self.last_pong = time.time()
53         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
54
55     def _pingtest(self, send_ping=True):
56         if self.last_pong + self.timeout < time.time():
57             print("SERVER NOT ANSWERING")
58             raise ExceptionForRestart
59         if send_ping:
60             self.send_line("PING " + self.servername)
61
62     def send_line(self, msg):
63         msg = msg.replace("\r", " ")
64         msg = msg.replace("\n", " ")
65         if len(msg.encode("utf-8")) > 510:
66             print("NOT SENT LINE TO SERVER (too long): " + msg)
67         print("LINE TO SERVER: "
68               + str(datetime.datetime.now()) + ": " + msg)
69         msg = msg + "\r\n"
70         msg_len = len(msg)
71         total_sent_len = 0
72         while total_sent_len < msg_len:
73             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
74             if sent_len == 0:
75                 print("SOCKET CONNECTION BROKEN")
76                 raise ExceptionForRestart
77             total_sent_len += sent_len
78
79     def _recv_line_wrapped(self, send_ping=True):
80         if len(self.line_buffer) > 0:
81             return self.line_buffer.pop(0)
82         while True:
83             ready = select.select([self.socket], [], [], int(self.timeout / 2))
84             if not ready[0]:
85                 self._pingtest(send_ping)
86                 return None
87             self.last_pong = time.time()
88             received_bytes = self.socket.recv(1024)
89             try:
90                 received_runes = received_bytes.decode("UTF-8")
91             except UnicodeDecodeError:
92                 received_runes = received_bytes.decode("latin1")
93             if len(received_runes) == 0:
94                 print("SOCKET CONNECTION BROKEN")
95                 raise ExceptionForRestart
96             self.rune_buffer += received_runes
97             lines_split = str.split(self.rune_buffer, "\r\n")
98             self.line_buffer += lines_split[:-1]
99             self.rune_buffer = lines_split[-1]
100             if len(self.line_buffer) > 0:
101                 return self.line_buffer.pop(0)
102
103     def recv_line(self, send_ping=True):
104         line = self._recv_line_wrapped(send_ping)
105         if line:
106             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
107                   line)
108         return line
109
110
111 def init_session(server, port, timeout, nickname, username, channel):
112     print("CONNECTING TO " + server)
113     io = IO(server, port, timeout)
114     io.send_line("NICK " + nickname)
115     io.send_line("USER " + username + " 0 * : ")
116     io.send_line("JOIN " + channel)
117     return io
118
119
120 def lineparser_loop(io, nickname):
121
122     def act_on_privmsg(tokens):
123
124         def url_check(msg):
125
126             def notice(msg):
127                 io.send_line("NOTICE " + target + " :" + msg)
128
129             matches = re.findall("(https?://[^\s>]+)", msg)
130             for i in range(len(matches)):
131                 url = matches[i]
132                 request = urllib.request.Request(url, headers={
133                     "User-Agent": "plomlombot"
134                 })
135                 try:
136                     webpage = urllib.request.urlopen(request, timeout=15)
137                 except (urllib.error.HTTPError, urllib.error.URLError,
138                         UnicodeError, http.client.BadStatusLine) as error:
139                     notice("TROUBLE FOLLOWING URL: " + str(error))
140                     continue
141                 charset = webpage.info().get_content_charset()
142                 if not charset:
143                     notice("TROUBLE READING PAGE TITLE: no charset in header")
144                     continue
145                 content_type = webpage.info().get_content_type()
146                 if content_type not in ('text/html', 'text/xml',
147                                         'application/xhtml+xml'):
148                     notice("TROUBLE READING PAGE TITLE: bad content type "
149                            + content_type)
150                     continue
151                 content = webpage.read().decode(charset)
152                 title = HTMLParser(content, "title").data
153                 title = html.unescape(title)
154                 notice("PAGE TITLE FOR URL: " + title)
155
156         sender = ""
157         for rune in tokens[0]:
158             if rune == "!":
159                 break
160             if rune != ":":
161                 sender += rune
162         receiver = ""
163         for rune in tokens[2]:
164             if rune == "!":
165                 break
166             if rune != ":":
167                 receiver += rune
168         target = sender
169         if receiver != nickname:
170             target = receiver
171         msg = str.join(" ", tokens[3:])[1:]
172         url_check(msg)
173
174     while True:
175         line = io.recv_line()
176         if not line:
177             continue
178         tokens = line.split(" ")
179         if len(tokens) > 1:
180             if tokens[1] == "PRIVMSG":
181                 act_on_privmsg(tokens)
182             if tokens[0] == "PING":
183                 io.send_line("PONG " + tokens[1])
184
185
186 def parse_command_line_arguments():
187     parser = argparse.ArgumentParser()
188     parser.add_argument("-s, --server", action="store", dest="server",
189                         default=SERVER,
190                         help="server or server net to connect to (default: "
191                         + SERVER + ")")
192     parser.add_argument("-p, --port", action="store", dest="port", type=int,
193                         default=PORT, help="port to connect to (default : "
194                         + str(PORT) + ")")
195     parser.add_argument("-t, --timeout", action="store", dest="timeout",
196                         type=int, default=TIMEOUT,
197                         help="timeout in seconds after which to attempt " +
198                         "reconnect (default: " + str(TIMEOUT) + ")")
199     parser.add_argument("-u, --username", action="store", dest="username",
200                         default=USERNAME, help="username to use (default: "
201                         + USERNAME + ")")
202     parser.add_argument("-n, --nickname", action="store", dest="nickname",
203                         default=NICKNAME, help="nickname to use (default: "
204                         + NICKNAME + ")")
205     parser.add_argument("CHANNEL", action="store", help="channel to join")
206     opts, unknown = parser.parse_known_args()
207     return opts
208
209 opts = parse_command_line_arguments()
210 while True:
211     try:
212         io = init_session(opts.server, opts.port, opts.timeout, opts.nickname,
213                           opts.username, opts.CHANNEL)
214         lineparser_loop(io, opts.nickname)
215     except ExceptionForRestart:
216         io.socket.close()
217         continue