home · contact · privacy
Use dedicated DB dir for quotes and markov log files.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import requests
10 import bs4
11 import random
12 import hashlib
13 import os
14 import plomsearch
15
16 # Defaults, may be overwritten by command line arguments.
17 SERVER = "irc.freenode.net"
18 PORT = 6667
19 TIMEOUT = 240
20 USERNAME = "plomlombot"
21 NICKNAME = USERNAME
22 TWTFILE = ""
23 DBDIR = os.path.expanduser("~/plomlombot_db")
24
25
26 class ExceptionForRestart(Exception):
27     pass
28
29
30 class IO:
31
32     def __init__(self, server, port, timeout):
33         self.timeout = timeout
34         self.socket = socket.socket()
35         self.socket.connect((server, port))
36         self.socket.setblocking(0)
37         self.line_buffer = []
38         self.rune_buffer = ""
39         self.last_pong = time.time()
40         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
41
42     def _pingtest(self, send_ping=True):
43         if self.last_pong + self.timeout < time.time():
44             print("SERVER NOT ANSWERING")
45             raise ExceptionForRestart
46         if send_ping:
47             self.send_line("PING " + self.servername)
48
49     def send_line(self, msg):
50         msg = msg.replace("\r", " ")
51         msg = msg.replace("\n", " ")
52         if len(msg.encode("utf-8")) > 510:
53             print("NOT SENT LINE TO SERVER (too long): " + msg)
54         print("LINE TO SERVER: "
55               + str(datetime.datetime.now()) + ": " + msg)
56         msg = msg + "\r\n"
57         msg_len = len(msg)
58         total_sent_len = 0
59         while total_sent_len < msg_len:
60             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
61             if sent_len == 0:
62                 print("SOCKET CONNECTION BROKEN")
63                 raise ExceptionForRestart
64             total_sent_len += sent_len
65
66     def _recv_line_wrapped(self, send_ping=True):
67         if len(self.line_buffer) > 0:
68             return self.line_buffer.pop(0)
69         while True:
70             ready = select.select([self.socket], [], [], int(self.timeout / 2))
71             if not ready[0]:
72                 self._pingtest(send_ping)
73                 return None
74             self.last_pong = time.time()
75             received_bytes = self.socket.recv(1024)
76             try:
77                 received_runes = received_bytes.decode("UTF-8")
78             except UnicodeDecodeError:
79                 received_runes = received_bytes.decode("latin1")
80             if len(received_runes) == 0:
81                 print("SOCKET CONNECTION BROKEN")
82                 raise ExceptionForRestart
83             self.rune_buffer += received_runes
84             lines_split = str.split(self.rune_buffer, "\r\n")
85             self.line_buffer += lines_split[:-1]
86             self.rune_buffer = lines_split[-1]
87             if len(self.line_buffer) > 0:
88                 return self.line_buffer.pop(0)
89
90     def recv_line(self, send_ping=True):
91         line = self._recv_line_wrapped(send_ping)
92         if line:
93             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
94                   line)
95         return line
96
97
98 def handle_command(command, argument, notice, target, session):
99     hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
100     quotesfile_name = session.dbdir + "/quotes_" + hash_string
101
102     def addquote():
103         if not os.access(quotesfile_name, os.F_OK):
104             quotesfile = open(quotesfile_name, "w")
105             quotesfile.write("QUOTES FOR " + target + ":\n")
106             quotesfile.close()
107         quotesfile = open(quotesfile_name, "a")
108         quotesfile.write(argument + "\n")
109         quotesfile.close()
110         quotesfile = open(quotesfile_name, "r")
111         lines = quotesfile.readlines()
112         quotesfile.close()
113         notice("ADDED QUOTE #" + str(len(lines) - 1))
114
115     def quote():
116
117         def help():
118             notice("SYNTAX: !quote [int] OR !quote search QUERY")
119             notice("QUERY may be a boolean grouping of quoted or unquoted " +
120                    "search terms, examples:")
121             notice("!quote search foo")
122             notice("!quote search foo AND (bar OR NOT baz)")
123             notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
124                    " OR 'bar\\'baz')")
125
126         if "" == argument:
127             tokens = []
128         else:
129             tokens = argument.split(" ")
130         if (len(tokens) > 1 and tokens[0] != "search") or \
131             (len(tokens) == 1 and
132                 (tokens[0] == "search" or not tokens[0].isdigit())):
133             help()
134             return
135         if not os.access(quotesfile_name, os.F_OK):
136             notice("NO QUOTES AVAILABLE")
137             return
138         quotesfile = open(quotesfile_name, "r")
139         lines = quotesfile.readlines()
140         quotesfile.close()
141         lines = lines[1:]
142         if len(tokens) == 1:
143             i = int(tokens[0])
144             if i == 0 or i > len(lines):
145                 notice("THERE'S NO QUOTE OF THAT INDEX")
146                 return
147             i = i - 1
148         elif len(tokens) > 1:
149             query = str.join(" ", tokens[1:])
150             try:
151                 results = plomsearch.search(query, lines)
152             except plomsearch.LogicParserError as err:
153                 notice("FAILED QUERY PARSING: " + str(err))
154                 return
155             if len(results) == 0:
156                 notice("NO QUOTES MATCHING QUERY")
157             else:
158                 for result in results:
159                     notice("QUOTE #" + str(result[0] + 1) + " : " + result[1])
160             return
161         else:
162             i = random.randrange(len(lines))
163         notice("QUOTE #" + str(i + 1) + ": " + lines[i])
164
165     def markov():
166         from random import choice, shuffle
167         select_length = 2
168         selections = []
169
170         def markov(snippet):
171             usable_selections = []
172             for i in range(select_length, 0, -1):
173                 for selection in selections:
174                     add = True
175                     for j in range(i):
176                         j += 1
177                         if snippet[-j] != selection[-(j+1)]:
178                             add = False
179                             break
180                     if add:
181                         usable_selections += [selection]
182                 if [] != usable_selections:
183                     break
184             if [] == usable_selections:
185                 usable_selections = selections
186             selection = choice(usable_selections)
187             return selection[select_length]
188
189         hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
190         markovfeed_name = session.dbdir + "/markovfeed_" + hash_string
191         if not os.access(markovfeed_name, os.F_OK):
192             notice("NOT ENOUGH TEXT TO MARKOV.")
193             return
194
195         # Lowercase incoming lines, ensure they end in a sentence end mark.
196         file = open(markovfeed_name, "r")
197         lines = file.readlines()
198         file.close()
199         tokens = []
200         sentence_end_markers = ".!?)("
201         for line in lines:
202             line = line.lower().replace("\n", "")
203             if line[-1] not in sentence_end_markers:
204                 line += "."
205             tokens += line.split()
206         if len(tokens) <= select_length:
207             notice("NOT ENOUGH TEXT TO MARKOV.")
208             return
209
210         # Replace URLs with escape string for now, so that the Markov selector
211         # won't see them as different strings. Stash replaced URLs in urls.
212         urls = []
213         url_escape = "\nURL"
214         url_starts = ["http://", "https://", "<http://", "<https://"]
215         for i in range(len(tokens)):
216             for url_start in url_starts:
217                 if tokens[i][:len(url_start)] == url_start:
218                     length = len(tokens[i])
219                     if url_start[0] == "<":
220                         try:
221                             length = tokens[i].index(">") + 1
222                         except ValueError:
223                             pass
224                     urls += [tokens[i][:length]]
225                     tokens[i] = url_escape + tokens[i][length:]
226                     break
227
228         # For each snippet of select_length, use markov() to find continuation
229         # token from selections. Replace present users' names with malkovich.
230         # Start snippets with the beginning of a sentence, if possible.
231         for i in range(len(tokens) - select_length):
232             token_list = []
233             for j in range(select_length + 1):
234                 token_list += [tokens[i + j]]
235             selections += [token_list]
236         snippet = []
237         for i in range(select_length):
238             snippet += [""]
239         shuffle(selections)
240         for i in range(len(selections)):
241             if selections[i][0][-1] in sentence_end_markers:
242                 for i in range(select_length):
243                     snippet[i] = selections[i][i + 1]
244                 break
245         msg = ""
246         malkovich = "malkovich"
247         while 1:
248             new_end = markov(snippet)
249             for name in session.users_in_chan:
250                 if new_end[:len(name)] == name.lower():
251                     new_end = malkovich + new_end[len(name):]
252                     break
253             if len(msg) + len(new_end) > 200:
254                 break
255             msg += new_end + " "
256             for i in range(select_length - 1):
257                 snippet[i] = snippet[i + 1]
258             snippet[select_length - 1] = new_end
259
260         # Replace occurences of url escape string with random choice from urls.
261         while True:
262             index = msg.find(url_escape)
263             if index < 0:
264                 break
265             msg = msg.replace(url_escape, choice(urls), 1)
266
267         # More meaningful ways to randomly end sentences.
268         notice(msg + malkovich + ".")
269
270     def twt():
271         def try_open(mode):
272             try:
273                 twtfile = open(session.twtfile, mode)
274             except (PermissionError, FileNotFoundError) as err:
275                 notice("CAN'T ACCESS OR CREATE TWT FILE: " + str(err))
276                 return None
277             return twtfile
278
279         from datetime import datetime
280         if not os.access(session.twtfile, os.F_OK):
281             twtfile = try_open("w")
282             if None == twtfile:
283                 return
284             twtfile.close()
285         twtfile = try_open("a")
286         if None == twtfile:
287             return
288         twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
289         twtfile.close()
290         notice("WROTE TWT.")
291
292     if "addquote" == command:
293         addquote()
294     elif "quote" == command:
295         quote()
296     elif "markov" == command:
297         markov()
298     elif "twt" == command:
299         twt()
300
301
302 def handle_url(url, notice, show_url=False):
303
304     def mobile_twitter_hack(url):
305         re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
306         re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
307         m = re.search(re1, url)
308         if m and m.group(1) == 'mobile.twitter.com/' \
309                 and m.group(2) == '/status/':
310             m = re.search(re2, url)
311             url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
312             handle_url(url, notice, True)
313             return True
314
315     try:
316         r = requests.get(url, timeout=15)
317     except (requests.exceptions.TooManyRedirects,
318             requests.exceptions.ConnectionError,
319             requests.exceptions.InvalidURL,
320             UnicodeError,
321             requests.exceptions.InvalidSchema) as error:
322         notice("TROUBLE FOLLOWING URL: " + str(error))
323         return
324     if mobile_twitter_hack(url):
325         return
326     title = bs4.BeautifulSoup(r.text, "html5lib").title
327     if title and title.string:
328         prefix = "PAGE TITLE: "
329         if show_url:
330             prefix = "PAGE TITLE FOR <" + url + ">: "
331         notice(prefix + title.string.strip())
332     else:
333         notice("PAGE HAS NO TITLE TAG")
334
335
336 class Session:
337
338     def __init__(self, io, username, nickname, channel, twtfile, dbdir):
339         self.io = io
340         self.nickname = nickname
341         self.channel = channel
342         self.users_in_chan = []
343         self.twtfile = twtfile
344         self.dbdir = dbdir
345         self.io.send_line("NICK " + self.nickname)
346         self.io.send_line("USER " + username + " 0 * : ")
347         self.io.send_line("JOIN " + self.channel)
348
349     def loop(self):
350
351         def handle_privmsg(tokens):
352
353             def handle_input(msg, target):
354
355                 def notice(msg):
356                     self.io.send_line("NOTICE " + target + " :" + msg)
357
358                 matches = re.findall("(https?://[^\s>]+)", msg)
359                 for i in range(len(matches)):
360                     handle_url(matches[i], notice)
361                 if "!" == msg[0]:
362                     tokens = msg[1:].split()
363                     argument = str.join(" ", tokens[1:])
364                     handle_command(tokens[0], argument, notice, target, self)
365                     return
366                 hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
367                 markovfeed_name = self.dbdir + "/markovfeed_" + hash_string
368                 file = open(markovfeed_name, "a")
369                 file.write(msg + "\n")
370                 file.close()
371
372             sender = ""
373             for rune in tokens[0]:
374                 if rune == "!":
375                     break
376                 if rune != ":":
377                     sender += rune
378             receiver = ""
379             for rune in tokens[2]:
380                 if rune == "!":
381                     break
382                 if rune != ":":
383                     receiver += rune
384             target = sender
385             if receiver != self.nickname:
386                 target = receiver
387             msg = str.join(" ", tokens[3:])[1:]
388             handle_input(msg, target)
389
390         def name_from_join_or_part(tokens):
391             token = tokens[0][1:]
392             index_cut = token.find("@")
393             index_ex = token.find("!")
394             if index_ex > 0 and index_ex < index_cut:
395                 index_cut = index_ex
396             return token[:index_cut]
397
398         while True:
399             line = self.io.recv_line()
400             if not line:
401                 continue
402             tokens = line.split(" ")
403             if len(tokens) > 1:
404                 if tokens[0] == "PING":
405                     self.io.send_line("PONG " + tokens[1])
406                 elif tokens[1] == "PRIVMSG":
407                     handle_privmsg(tokens)
408                 elif tokens[1] == "353":
409                     names = tokens[5:]
410                     names[0] = names[0][1:]
411                     self.users_in_chan += names
412                 elif tokens[1] == "JOIN":
413                     name = name_from_join_or_part(tokens)
414                     if name != self.nickname:
415                         self.users_in_chan += [name]
416                 elif tokens[1] == "PART":
417                     name = name_from_join_or_part(tokens)
418                     del(self.users_in_chan[self.users_in_chan.index(name)])
419
420 def parse_command_line_arguments():
421     parser = argparse.ArgumentParser()
422     parser.add_argument("-s, --server", action="store", dest="server",
423                         default=SERVER,
424                         help="server or server net to connect to (default: "
425                         + SERVER + ")")
426     parser.add_argument("-p, --port", action="store", dest="port", type=int,
427                         default=PORT, help="port to connect to (default : "
428                         + str(PORT) + ")")
429     parser.add_argument("-w, --wait", action="store", dest="timeout",
430                         type=int, default=TIMEOUT,
431                         help="timeout in seconds after which to attempt " +
432                         "reconnect (default: " + str(TIMEOUT) + ")")
433     parser.add_argument("-u, --username", action="store", dest="username",
434                         default=USERNAME, help="username to use (default: "
435                         + USERNAME + ")")
436     parser.add_argument("-n, --nickname", action="store", dest="nickname",
437                         default=NICKNAME, help="nickname to use (default: "
438                         + NICKNAME + ")")
439     parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
440                         default=TWTFILE, help="twtxt file to use (default: "
441                         + TWTFILE + ")")
442     parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
443                         default=DBDIR, help="directory to store DB files in")
444     parser.add_argument("CHANNEL", action="store", help="channel to join")
445     opts, unknown = parser.parse_known_args()
446     return opts
447
448
449 opts = parse_command_line_arguments()
450 while True:
451     try:
452         io = IO(opts.server, opts.port, opts.timeout)
453         session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
454             opts.twtfile, opts.dbdir)
455         session.loop()
456     except ExceptionForRestart:
457         io.socket.close()
458         continue