home · contact · privacy
Cancel page title reading on bad charset.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import urllib.request
10 import http.client
11 import html
12
13 # Defaults, may be overwritten by command line arguments.
14 SERVER = "irc.freenode.net"
15 PORT = 6667
16 TIMEOUT = 240
17 USERNAME = "plomlombot"
18 NICKNAME = USERNAME
19
20
21 class ExceptionForRestart(Exception):
22     pass
23
24
25 class IO:
26
27     def __init__(self, server, port, timeout):
28         self.timeout = timeout
29         self.socket = socket.socket()
30         self.socket.connect((server, port))
31         self.socket.setblocking(0)
32         self.line_buffer = []
33         self.rune_buffer = ""
34         self.last_pong = time.time()
35         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
36
37     def _pingtest(self, send_ping=True):
38         if self.last_pong + self.timeout < time.time():
39             print("SERVER NOT ANSWERING")
40             raise ExceptionForRestart
41         if send_ping:
42             self.send_line("PING " + self.servername)
43
44     def send_line(self, msg):
45         msg = msg.replace("\r", " ")
46         msg = msg.replace("\n", " ")
47         if len(msg.encode("utf-8")) > 510:
48             print("NOT SENT LINE TO SERVER (too long): " + msg)
49         print("LINE TO SERVER: "
50               + str(datetime.datetime.now()) + ": " + msg)
51         msg = msg + "\r\n"
52         msg_len = len(msg)
53         total_sent_len = 0
54         while total_sent_len < msg_len:
55             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
56             if sent_len == 0:
57                 print("SOCKET CONNECTION BROKEN")
58                 raise ExceptionForRestart
59             total_sent_len += sent_len
60
61     def _recv_line_wrapped(self, send_ping=True):
62         if len(self.line_buffer) > 0:
63             return self.line_buffer.pop(0)
64         while True:
65             ready = select.select([self.socket], [], [], int(self.timeout / 2))
66             if not ready[0]:
67                 self._pingtest(send_ping)
68                 return None
69             self.last_pong = time.time()
70             received_bytes = self.socket.recv(1024)
71             try:
72                 received_runes = received_bytes.decode("UTF-8")
73             except UnicodeDecodeError:
74                 received_runes = received_bytes.decode("latin1")
75             if len(received_runes) == 0:
76                 print("SOCKET CONNECTION BROKEN")
77                 raise ExceptionForRestart
78             self.rune_buffer += received_runes
79             lines_split = str.split(self.rune_buffer, "\r\n")
80             self.line_buffer += lines_split[:-1]
81             self.rune_buffer = lines_split[-1]
82             if len(self.line_buffer) > 0:
83                 return self.line_buffer.pop(0)
84
85     def recv_line(self, send_ping=True):
86         line = self._recv_line_wrapped(send_ping)
87         if line:
88             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
89                   line)
90         return line
91
92
93 def init_session(server, port, timeout, nickname, username, channel):
94     print("CONNECTING TO " + server)
95     io = IO(server, port, timeout)
96     io.send_line("NICK " + nickname)
97     io.send_line("USER " + username + " 0 * : ")
98     io.send_line("JOIN " + channel)
99     return io
100
101
102 def lineparser_loop(io, nickname):
103
104     def act_on_privmsg(tokens):
105
106         def url_check(msg):
107
108             def notice(msg):
109                 io.send_line("NOTICE " + target + " :" + msg)
110
111             matches = re.findall("(https?://[^\s>]+)", msg)
112             for i in range(len(matches)):
113                 url = matches[i]
114                 request = urllib.request.Request(url, headers={
115                     "User-Agent": "plomlombot"
116                 })
117                 try:
118                     webpage = urllib.request.urlopen(request, timeout=15)
119                 except (urllib.error.HTTPError, urllib.error.URLError,
120                         UnicodeError, http.client.BadStatusLine) as error:
121                     notice("TROUBLE FOLLOWING URL: " + str(error))
122                     continue
123                 charset = webpage.info().get_content_charset()
124                 if not charset:
125                     notice("TROUBLE READING PAGE TITLE: no charset in header")
126                     continue
127                 content_type = webpage.info().get_content_type()
128                 if content_type not in ('text/html', 'text/xml',
129                                         'application/xhtml+xml'):
130                     notice("TROUBLE READING PAGE TITLE: bad content type "
131                            + content_type)
132                     continue
133                 content = webpage.read().decode(charset)
134                 title = str(content).split('<title>')[1].split('</title>')[0]
135                 title = html.unescape(title)
136                 notice("PAGE TITLE FOR URL: " + title)
137
138         sender = ""
139         for rune in tokens[0]:
140             if rune == "!":
141                 break
142             if rune != ":":
143                 sender += rune
144         receiver = ""
145         for rune in tokens[2]:
146             if rune == "!":
147                 break
148             if rune != ":":
149                 receiver += rune
150         target = sender
151         if receiver != nickname:
152             target = receiver
153         msg = str.join(" ", tokens[3:])[1:]
154         url_check(msg)
155
156     while True:
157         line = io.recv_line()
158         if not line:
159             continue
160         tokens = line.split(" ")
161         if len(tokens) > 1:
162             if tokens[1] == "PRIVMSG":
163                 act_on_privmsg(tokens)
164             if tokens[0] == "PING":
165                 io.send_line("PONG " + tokens[1])
166
167
168 def parse_command_line_arguments():
169     parser = argparse.ArgumentParser()
170     parser.add_argument("-s, --server", action="store", dest="server",
171                         default=SERVER,
172                         help="server or server net to connect to (default: "
173                         + SERVER + ")")
174     parser.add_argument("-p, --port", action="store", dest="port", type=int,
175                         default=PORT, help="port to connect to (default : "
176                         + str(PORT) + ")")
177     parser.add_argument("-t, --timeout", action="store", dest="timeout",
178                         type=int, default=TIMEOUT,
179                         help="timeout in seconds after which to attempt " +
180                         "reconnect (default: " + str(TIMEOUT) + ")")
181     parser.add_argument("-u, --username", action="store", dest="username",
182                         default=USERNAME, help="username to use (default: "
183                         + USERNAME + ")")
184     parser.add_argument("-n, --nickname", action="store", dest="nickname",
185                         default=NICKNAME, help="nickname to use (default: "
186                         + NICKNAME + ")")
187     parser.add_argument("CHANNEL", action="store", help="channel to join")
188     opts, unknown = parser.parse_known_args()
189     return opts
190
191 opts = parse_command_line_arguments()
192 while True:
193     try:
194         io = init_session(opts.server, opts.port, opts.timeout, opts.nickname,
195                           opts.username, opts.CHANNEL)
196         lineparser_loop(io, opts.nickname)
197     except ExceptionForRestart:
198         io.socket.close()
199         continue