home · contact · privacy
In Markov text generation, lowercase earlier.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import requests
10 import bs4
11 import random
12 import hashlib
13 import os
14 import plomsearch
15
16 URLREGEX = "(https?://[^\s>]+)"
17
18 # Defaults, may be overwritten by command line arguments.
19 SERVER = "irc.freenode.net"
20 PORT = 6667
21 TIMEOUT = 240
22 USERNAME = "plomlombot"
23 NICKNAME = USERNAME
24
25
26 class ExceptionForRestart(Exception):
27     pass
28
29
30 class IO:
31
32     def __init__(self, server, port, timeout):
33         self.timeout = timeout
34         self.socket = socket.socket()
35         self.socket.connect((server, port))
36         self.socket.setblocking(0)
37         self.line_buffer = []
38         self.rune_buffer = ""
39         self.last_pong = time.time()
40         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
41
42     def _pingtest(self, send_ping=True):
43         if self.last_pong + self.timeout < time.time():
44             print("SERVER NOT ANSWERING")
45             raise ExceptionForRestart
46         if send_ping:
47             self.send_line("PING " + self.servername)
48
49     def send_line(self, msg):
50         msg = msg.replace("\r", " ")
51         msg = msg.replace("\n", " ")
52         if len(msg.encode("utf-8")) > 510:
53             print("NOT SENT LINE TO SERVER (too long): " + msg)
54         print("LINE TO SERVER: "
55               + str(datetime.datetime.now()) + ": " + msg)
56         msg = msg + "\r\n"
57         msg_len = len(msg)
58         total_sent_len = 0
59         while total_sent_len < msg_len:
60             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
61             if sent_len == 0:
62                 print("SOCKET CONNECTION BROKEN")
63                 raise ExceptionForRestart
64             total_sent_len += sent_len
65
66     def _recv_line_wrapped(self, send_ping=True):
67         if len(self.line_buffer) > 0:
68             return self.line_buffer.pop(0)
69         while True:
70             ready = select.select([self.socket], [], [], int(self.timeout / 2))
71             if not ready[0]:
72                 self._pingtest(send_ping)
73                 return None
74             self.last_pong = time.time()
75             received_bytes = self.socket.recv(1024)
76             try:
77                 received_runes = received_bytes.decode("UTF-8")
78             except UnicodeDecodeError:
79                 received_runes = received_bytes.decode("latin1")
80             if len(received_runes) == 0:
81                 print("SOCKET CONNECTION BROKEN")
82                 raise ExceptionForRestart
83             self.rune_buffer += received_runes
84             lines_split = str.split(self.rune_buffer, "\r\n")
85             self.line_buffer += lines_split[:-1]
86             self.rune_buffer = lines_split[-1]
87             if len(self.line_buffer) > 0:
88                 return self.line_buffer.pop(0)
89
90     def recv_line(self, send_ping=True):
91         line = self._recv_line_wrapped(send_ping)
92         if line:
93             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
94                   line)
95         return line
96
97
98 def handle_command(command, argument, notice, target, session):
99     hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
100     quotesfile_name = "quotes_" + hash_string
101
102     def addquote():
103         if not os.access(quotesfile_name, os.F_OK):
104             quotesfile = open(quotesfile_name, "w")
105             quotesfile.write("QUOTES FOR " + target + ":\n")
106             quotesfile.close()
107         quotesfile = open(quotesfile_name, "a")
108         quotesfile.write(argument + "\n")
109         quotesfile.close()
110         quotesfile = open(quotesfile_name, "r")
111         lines = quotesfile.readlines()
112         quotesfile.close()
113         notice("ADDED QUOTE #" + str(len(lines) - 1))
114
115     def quote():
116
117         def help():
118             notice("SYNTAX: !quote [int] OR !quote search QUERY")
119             notice("QUERY may be a boolean grouping of quoted or unquoted " +
120                    "search terms, examples:")
121             notice("!quote search foo")
122             notice("!quote search foo AND (bar OR NOT baz)")
123             notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
124                    " OR 'bar\\'baz')")
125
126         if "" == argument:
127             tokens = []
128         else:
129             tokens = argument.split(" ")
130         if (len(tokens) > 1 and tokens[0] != "search") or \
131             (len(tokens) == 1 and
132                 (tokens[0] == "search" or not tokens[0].isdigit())):
133             help()
134             return
135         if not os.access(quotesfile_name, os.F_OK):
136             notice("NO QUOTES AVAILABLE")
137             return
138         quotesfile = open(quotesfile_name, "r")
139         lines = quotesfile.readlines()
140         quotesfile.close()
141         lines = lines[1:]
142         if len(tokens) == 1:
143             i = int(tokens[0])
144             if i == 0 or i > len(lines):
145                 notice("THERE'S NO QUOTE OF THAT INDEX")
146                 return
147             i = i - 1
148         elif len(tokens) > 1:
149             query = str.join(" ", tokens[1:])
150             try:
151                 results = plomsearch.search(query, lines)
152             except plomsearch.LogicParserError as err:
153                 notice("FAILED QUERY PARSING: " + str(err))
154                 return
155             if len(results) == 0:
156                 notice("NO QUOTES MATCHING QUERY")
157             else:
158                 for result in results:
159                     notice("QUOTE #" + str(result[0] + 1) + " : " + result[1])
160             return
161         else:
162             i = random.randrange(len(lines))
163         notice("QUOTE #" + str(i + 1) + ": " + lines[i])
164
165     def markov():
166         from random import shuffle
167         select_length = 2
168         selections = []
169
170         def markov(snippet):
171             usable_selections = []
172             for i in range(select_length, 0, -1):
173                 for selection in selections:
174                     add = True
175                     for j in range(i):
176                         if snippet[j] != selection[j]:
177                             add = False
178                             break
179                     if add:
180                         usable_selections += [selection]
181                 if [] != usable_selections:
182                     break
183             if [] == usable_selections:
184                 usable_selections = selections
185             shuffle(usable_selections)
186             return usable_selections[0][select_length]
187
188         def purge_undesired(tokens):
189             for token in tokens:
190                 if None != re.match("^" + URLREGEX, token):
191                     del(tokens[tokens.index(token)])
192             for name in session.uses_in_chan:
193                 while True:
194                     try:
195                         del(tokens[tokens.index(name.lower())])
196                     except ValueError:
197                         break
198             return tokens
199
200         hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
201         markovfeed_name = "markovfeed_" + hash_string
202         if not os.access(markovfeed_name, os.F_OK):
203             notice("NOT ENOUGH TEXT TO MARKOV.")
204             return
205         file = open(markovfeed_name, "r")
206         lines = file.readlines()
207         file.close()
208         tokens = []
209         for line in lines:
210             line = line.replace("\n", "").lower()
211             tokens += line.split()
212         tokens = purge_undesired(tokens)
213         if len(tokens) <= select_length:
214             notice("NOT ENOUGH TEXT TO MARKOV.")
215             return
216         for i in range(len(tokens) - select_length):
217             token_list = []
218             for j in range(select_length + 1):
219                 token_list += [tokens[i + j]]
220             selections += [token_list]
221         snippet = []
222         for i in range(select_length):
223             snippet += [""]
224         msg = ""
225         while 1:
226             new_end = markov(snippet)
227             if len(msg) + len(new_end) > 200:
228                 break
229             msg += new_end + " "
230             for i in range(select_length - 1):
231                 snippet[i] = snippet[i + 1]
232             snippet[select_length - 1] = new_end
233         notice(msg + "malkovich.")
234
235     if "addquote" == command:
236         addquote()
237     elif "quote" == command:
238         quote()
239     elif "markov" == command:
240         markov()
241
242
243 def handle_url(url, notice, show_url=False):
244
245     def mobile_twitter_hack(url):
246         re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
247         re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
248         m = re.search(re1, url)
249         if m and m.group(1) == 'mobile.twitter.com/' \
250                 and m.group(2) == '/status/':
251             m = re.search(re2, url)
252             url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
253             handle_url(url, notice, True)
254             return True
255
256     try:
257         r = requests.get(url, timeout=15)
258     except (requests.exceptions.TooManyRedirects,
259             requests.exceptions.ConnectionError,
260             requests.exceptions.InvalidURL,
261             requests.exceptions.InvalidSchema) as error:
262         notice("TROUBLE FOLLOWING URL: " + str(error))
263         return
264     if mobile_twitter_hack(url):
265         return
266     title = bs4.BeautifulSoup(r.text, "html.parser").title
267     if title:
268         prefix = "PAGE TITLE: "
269         if show_url:
270             prefix = "PAGE TITLE FOR <" + url + ">: "
271         notice(prefix + title.string.strip())
272     else:
273         notice("PAGE HAS NO TITLE TAG")
274
275
276 class Session:
277
278     def __init__(self, io, username, nickname, channel):
279         self.io = io
280         self.nickname = nickname
281         self.channel = channel
282         self.uses_in_chan = []
283         self.io.send_line("NICK " + self.nickname)
284         self.io.send_line("USER " + username + " 0 * : ")
285         self.io.send_line("JOIN " + self.channel)
286
287     def loop(self):
288
289         def handle_privmsg(tokens):
290
291             def handle_input(msg, target):
292
293                 def notice(msg):
294                     self.io.send_line("NOTICE " + target + " :" + msg)
295
296                 matches = re.findall(URLREGEX, msg)
297                 for i in range(len(matches)):
298                     handle_url(matches[i], notice)
299                 if "!" == msg[0]:
300                     tokens = msg[1:].split()
301                     argument = str.join(" ", tokens[1:])
302                     handle_command(tokens[0], argument, notice, target, self)
303                     return
304                 hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
305                 markovfeed_name = "markovfeed_" + hash_string
306                 file = open(markovfeed_name, "a")
307                 file.write(msg + "\n")
308                 file.close()
309
310             sender = ""
311             for rune in tokens[0]:
312                 if rune == "!":
313                     break
314                 if rune != ":":
315                     sender += rune
316             receiver = ""
317             for rune in tokens[2]:
318                 if rune == "!":
319                     break
320                 if rune != ":":
321                     receiver += rune
322             target = sender
323             if receiver != self.nickname:
324                 target = receiver
325             msg = str.join(" ", tokens[3:])[1:]
326             handle_input(msg, target)
327
328         def name_from_join_or_part(tokens):
329             token = tokens[0][1:]
330             index_cut = token.find("@")
331             index_ex = token.find("!")
332             if index_ex > 0 and index_ex < index_cut:
333                 index_cut = index_ex
334             return token[:index_cut]
335
336         while True:
337             line = self.io.recv_line()
338             if not line:
339                 continue
340             tokens = line.split(" ")
341             if len(tokens) > 1:
342                 if tokens[0] == "PING":
343                     self.io.send_line("PONG " + tokens[1])
344                 elif tokens[1] == "PRIVMSG":
345                     handle_privmsg(tokens)
346                 elif tokens[1] == "353":
347                     names = tokens[5:]
348                     names[0] = names[0][1:]
349                     self.uses_in_chan += names
350                 elif tokens[1] == "JOIN":
351                     name = name_from_join_or_part(tokens)
352                     if name != self.nickname:
353                         self.uses_in_chan += [name]
354                 elif tokens[1] == "PART":
355                     name = name_from_join_or_part(tokens)
356                     del(self.uses_in_chan[self.uses_in_chan.index(name)])
357
358 def parse_command_line_arguments():
359     parser = argparse.ArgumentParser()
360     parser.add_argument("-s, --server", action="store", dest="server",
361                         default=SERVER,
362                         help="server or server net to connect to (default: "
363                         + SERVER + ")")
364     parser.add_argument("-p, --port", action="store", dest="port", type=int,
365                         default=PORT, help="port to connect to (default : "
366                         + str(PORT) + ")")
367     parser.add_argument("-t, --timeout", action="store", dest="timeout",
368                         type=int, default=TIMEOUT,
369                         help="timeout in seconds after which to attempt " +
370                         "reconnect (default: " + str(TIMEOUT) + ")")
371     parser.add_argument("-u, --username", action="store", dest="username",
372                         default=USERNAME, help="username to use (default: "
373                         + USERNAME + ")")
374     parser.add_argument("-n, --nickname", action="store", dest="nickname",
375                         default=NICKNAME, help="nickname to use (default: "
376                         + NICKNAME + ")")
377     parser.add_argument("CHANNEL", action="store", help="channel to join")
378     opts, unknown = parser.parse_known_args()
379     return opts
380
381
382 opts = parse_command_line_arguments()
383 while True:
384     try:
385         io = IO(opts.server, opts.port, opts.timeout)
386         session = Session(io, opts.username, opts.nickname, opts.CHANNEL)
387         session.loop()
388     except ExceptionForRestart:
389         io.socket.close()
390         continue