home · contact · privacy
Handle UnicodeError in URL parsing.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import urllib.request
10 import html
11
12 # Defaults, may be overwritten by command line arguments.
13 SERVER = "irc.freenode.net"
14 PORT = 6667
15 TIMEOUT = 240
16 USERNAME = "plomlombot"
17 NICKNAME = USERNAME
18
19 class ExceptionForRestart(Exception):
20     pass
21
22 class IO:
23
24     def __init__(self, server, port, timeout):
25         self.timeout = timeout
26         self.socket = socket.socket()
27         self.socket.connect((server, port))
28         self.socket.setblocking(0)
29         self.line_buffer = []
30         self.rune_buffer = ""
31         self.last_pong = time.time()
32         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
33
34     def _pingtest(self, send_ping=True):
35         if self.last_pong + self.timeout < time.time():
36             print("SERVER NOT ANSWERING")
37             raise ExceptionForRestart
38         if send_ping:
39             self.send_line("PING " + self.servername)
40
41     def send_line(self, msg):
42         msg = msg.replace("\r", " ")
43         msg = msg.replace("\n", " ")
44         if len(msg.encode("utf-8")) > 510:
45             print("NOT SENT LINE TO SERVER (too long): " + msg)
46         print("LINE TO SERVER: "
47             + str(datetime.datetime.now()) + ": " + msg)
48         msg = msg + "\r\n"
49         msg_len = len(msg)
50         total_sent_len = 0
51         while total_sent_len < msg_len:
52             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
53             if sent_len == 0:
54                 print("SOCKET CONNECTION BROKEN")
55                 raise ExceptionForRestart
56             total_sent_len += sent_len
57
58     def _recv_line_wrapped(self, send_ping=True):
59         if len(self.line_buffer) > 0:
60             return self.line_buffer.pop(0)
61         while True:
62             ready = select.select([self.socket], [], [], int(self.timeout / 2))
63             if not ready[0]:
64                 self._pingtest(send_ping)
65                 return None
66             self.last_pong = time.time()
67             received_runes = self.socket.recv(1024).decode("UTF-8")
68             if len(received_runes) == 0:
69                 print("SOCKET CONNECTION BROKEN")
70                 raise ExceptionForRestart
71             self.rune_buffer += received_runes 
72             lines_split = str.split(self.rune_buffer, "\r\n")
73             self.line_buffer += lines_split[:-1]
74             self.rune_buffer = lines_split[-1]
75             if len(self.line_buffer) > 0:
76                 return self.line_buffer.pop(0)
77
78     def recv_line(self, send_ping=True):
79         line = self._recv_line_wrapped(send_ping)
80         if line:
81             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
82             line)
83         return line
84
85 def init_session(server, port, timeout, nickname, username, channel):
86     print("CONNECTING TO " + server)
87     io = IO(server, port, timeout)
88     io.send_line("NICK " + nickname)
89     io.send_line("USER " + username + " 0 * : ")
90     io.send_line("JOIN " + channel)
91     return io
92
93 def lineparser_loop(io, nickname):
94
95     def act_on_privmsg(tokens):
96
97         def url_check(msg):
98             matches = re.findall("(https?://[^\s]+)", msg)
99             for i in range(len(matches)):
100                 url = matches[i]
101                 try:
102                     webpage = urllib.request.urlopen(url, timeout=15)
103                 except (urllib.error.HTTPError, urllib.error.URLError,
104                         UnicodeError) as error:
105                     print("TROUBLE FOLLOWING URL: " + str(error))
106                     continue
107                 charset = webpage.info().get_content_charset()
108                 if not charset:
109                     charset="utf-8"
110                 content_type = webpage.info().get_content_type()
111                 if not content_type in ('text/html', 'text/xml',
112                         'application/xhtml+xml'):
113                     print("TROUBLE INTERPRETING URL: bad content type "
114                             + content_type)
115                     continue
116                 content = webpage.read().decode(charset)
117                 title = str(content).split('<title>')[1].split('</title>')[0]
118                 title = html.unescape(title)
119                 io.send_line("PRIVMSG " + target + " :page title for url: "
120                     + title)
121
122         sender = ""
123         for rune in tokens[0]:
124             if rune == "!":
125                 break
126             if rune != ":":
127                 sender += rune
128         receiver = ""
129         for rune in tokens[2]:
130             if rune == "!":
131                 break
132             if rune != ":":
133                 receiver += rune
134         target = sender
135         if receiver != nickname:
136             target = receiver
137         msg = str.join(" ", tokens[3:])[1:]
138         url_check(msg)
139
140     while 1:
141         line = io.recv_line()
142         if not line:
143             continue
144         tokens = line.split(" ")
145         if len(tokens) > 1:
146             if tokens[1] == "PRIVMSG":
147                 act_on_privmsg(tokens)
148             if tokens[0] == "PING":
149                 io.send_line("PONG " + tokens[1])
150
151 def parse_command_line_arguments():
152     parser = argparse.ArgumentParser()
153     parser.add_argument("-s, --server", action="store", dest="server",
154             default=SERVER,
155             help="server or server net to connect to (default: " + SERVER +
156             ")")
157     parser.add_argument("-p, --port", action="store", dest="port", type=int,
158             default=PORT, help="port to connect to (default : " + str(PORT) +
159             ")")
160     parser.add_argument("-t, --timeout", action="store", dest="timeout",
161             type=int, default=TIMEOUT,
162             help="timeout in seconds after which to attempt reconnect " +
163             "(default: " + str(TIMEOUT) + ")")
164     parser.add_argument("-u, --username", action="store", dest="username",
165             default=USERNAME, help="username to use (default: " + USERNAME +
166             ")")
167     parser.add_argument("-n, --nickname", action="store", dest="nickname",
168             default=NICKNAME, help="nickname to use (default: " + NICKNAME +
169             ")")
170     parser.add_argument("CHANNEL", action="store", help="channel to join")
171     opts, unknown = parser.parse_known_args()
172     return opts
173
174 opts = parse_command_line_arguments()
175 while 1:
176     try:
177         io = init_session(opts.server, opts.port, opts.timeout, opts.nickname,
178                 opts.username, opts.CHANNEL)
179         lineparser_loop(io, opts.nickname)
180     except ExceptionForRestart:
181         io.socket.close()
182         continue