home · contact · privacy
Add primitive channel logging.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import requests
10 import bs4
11 import random
12 import hashlib
13 import os
14 import plomsearch
15
16 # Defaults, may be overwritten by command line arguments.
17 SERVER = "irc.freenode.net"
18 PORT = 6667
19 TIMEOUT = 240
20 USERNAME = "plomlombot"
21 NICKNAME = USERNAME
22 TWTFILE = ""
23 DBDIR = os.path.expanduser("~/plomlombot_db")
24
25
26 class ExceptionForRestart(Exception):
27     pass
28
29
30 class IO:
31
32     def __init__(self, server, port, timeout):
33         self.timeout = timeout
34         self.socket = socket.socket()
35         self.socket.connect((server, port))
36         self.socket.setblocking(0)
37         self.line_buffer = []
38         self.rune_buffer = ""
39         self.last_pong = time.time()
40         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
41
42     def _pingtest(self, send_ping=True):
43         if self.last_pong + self.timeout < time.time():
44             print("SERVER NOT ANSWERING")
45             raise ExceptionForRestart
46         if send_ping:
47             self.send_line("PING " + self.servername)
48
49     def send_line(self, msg):
50         msg = msg.replace("\r", " ")
51         msg = msg.replace("\n", " ")
52         if len(msg.encode("utf-8")) > 510:
53             print("NOT SENT LINE TO SERVER (too long): " + msg)
54         print("LINE TO SERVER: "
55               + str(datetime.datetime.now()) + ": " + msg)
56         msg = msg + "\r\n"
57         msg_len = len(msg)
58         total_sent_len = 0
59         while total_sent_len < msg_len:
60             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
61             if sent_len == 0:
62                 print("SOCKET CONNECTION BROKEN")
63                 raise ExceptionForRestart
64             total_sent_len += sent_len
65
66     def _recv_line_wrapped(self, send_ping=True):
67         if len(self.line_buffer) > 0:
68             return self.line_buffer.pop(0)
69         while True:
70             ready = select.select([self.socket], [], [], int(self.timeout / 2))
71             if not ready[0]:
72                 self._pingtest(send_ping)
73                 return None
74             self.last_pong = time.time()
75             received_bytes = self.socket.recv(1024)
76             try:
77                 received_runes = received_bytes.decode("UTF-8")
78             except UnicodeDecodeError:
79                 received_runes = received_bytes.decode("latin1")
80             if len(received_runes) == 0:
81                 print("SOCKET CONNECTION BROKEN")
82                 raise ExceptionForRestart
83             self.rune_buffer += received_runes
84             lines_split = str.split(self.rune_buffer, "\r\n")
85             self.line_buffer += lines_split[:-1]
86             self.rune_buffer = lines_split[-1]
87             if len(self.line_buffer) > 0:
88                 return self.line_buffer.pop(0)
89
90     def recv_line(self, send_ping=True):
91         line = self._recv_line_wrapped(send_ping)
92         if line:
93             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
94                   line)
95         return line
96
97
98 def handle_command(command, argument, notice, target, session):
99     hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
100     quotesfile_name = session.dbdir + "/quotes_" + hash_string
101
102     def addquote():
103         if not os.access(quotesfile_name, os.F_OK):
104             quotesfile = open(quotesfile_name, "w")
105             quotesfile.write("QUOTES FOR " + target + ":\n")
106             quotesfile.close()
107         quotesfile = open(quotesfile_name, "a")
108         quotesfile.write(argument + "\n")
109         quotesfile.close()
110         quotesfile = open(quotesfile_name, "r")
111         lines = quotesfile.readlines()
112         quotesfile.close()
113         notice("ADDED QUOTE #" + str(len(lines) - 1))
114
115     def quote():
116
117         def help():
118             notice("SYNTAX: !quote [int] OR !quote search QUERY")
119             notice("QUERY may be a boolean grouping of quoted or unquoted " +
120                    "search terms, examples:")
121             notice("!quote search foo")
122             notice("!quote search foo AND (bar OR NOT baz)")
123             notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
124                    " OR 'bar\\'baz')")
125
126         if "" == argument:
127             tokens = []
128         else:
129             tokens = argument.split(" ")
130         if (len(tokens) > 1 and tokens[0] != "search") or \
131             (len(tokens) == 1 and
132                 (tokens[0] == "search" or not tokens[0].isdigit())):
133             help()
134             return
135         if not os.access(quotesfile_name, os.F_OK):
136             notice("NO QUOTES AVAILABLE")
137             return
138         quotesfile = open(quotesfile_name, "r")
139         lines = quotesfile.readlines()
140         quotesfile.close()
141         lines = lines[1:]
142         if len(tokens) == 1:
143             i = int(tokens[0])
144             if i == 0 or i > len(lines):
145                 notice("THERE'S NO QUOTE OF THAT INDEX")
146                 return
147             i = i - 1
148         elif len(tokens) > 1:
149             query = str.join(" ", tokens[1:])
150             try:
151                 results = plomsearch.search(query, lines)
152             except plomsearch.LogicParserError as err:
153                 notice("FAILED QUERY PARSING: " + str(err))
154                 return
155             if len(results) == 0:
156                 notice("NO QUOTES MATCHING QUERY")
157             else:
158                 for result in results:
159                     notice("QUOTE #" + str(result[0] + 1) + " : " + result[1])
160             return
161         else:
162             i = random.randrange(len(lines))
163         notice("QUOTE #" + str(i + 1) + ": " + lines[i])
164
165     def markov():
166         from random import choice, shuffle
167         select_length = 2
168         selections = []
169
170         def markov(snippet):
171             usable_selections = []
172             for i in range(select_length, 0, -1):
173                 for selection in selections:
174                     add = True
175                     for j in range(i):
176                         j += 1
177                         if snippet[-j] != selection[-(j+1)]:
178                             add = False
179                             break
180                     if add:
181                         usable_selections += [selection]
182                 if [] != usable_selections:
183                     break
184             if [] == usable_selections:
185                 usable_selections = selections
186             selection = choice(usable_selections)
187             return selection[select_length]
188
189         hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
190         markovfeed_name = session.dbdir + "/markovfeed_" + hash_string
191         if not os.access(markovfeed_name, os.F_OK):
192             notice("NOT ENOUGH TEXT TO MARKOV.")
193             return
194
195         # Lowercase incoming lines, ensure they end in a sentence end mark.
196         file = open(markovfeed_name, "r")
197         lines = file.readlines()
198         file.close()
199         tokens = []
200         sentence_end_markers = ".!?)("
201         for line in lines:
202             line = line.lower().replace("\n", "")
203             if line[-1] not in sentence_end_markers:
204                 line += "."
205             tokens += line.split()
206         if len(tokens) <= select_length:
207             notice("NOT ENOUGH TEXT TO MARKOV.")
208             return
209
210         # Replace URLs with escape string for now, so that the Markov selector
211         # won't see them as different strings. Stash replaced URLs in urls.
212         urls = []
213         url_escape = "\nURL"
214         url_starts = ["http://", "https://", "<http://", "<https://"]
215         for i in range(len(tokens)):
216             for url_start in url_starts:
217                 if tokens[i][:len(url_start)] == url_start:
218                     length = len(tokens[i])
219                     if url_start[0] == "<":
220                         try:
221                             length = tokens[i].index(">") + 1
222                         except ValueError:
223                             pass
224                     urls += [tokens[i][:length]]
225                     tokens[i] = url_escape + tokens[i][length:]
226                     break
227
228         # For each snippet of select_length, use markov() to find continuation
229         # token from selections. Replace present users' names with malkovich.
230         # Start snippets with the beginning of a sentence, if possible.
231         for i in range(len(tokens) - select_length):
232             token_list = []
233             for j in range(select_length + 1):
234                 token_list += [tokens[i + j]]
235             selections += [token_list]
236         snippet = []
237         for i in range(select_length):
238             snippet += [""]
239         shuffle(selections)
240         for i in range(len(selections)):
241             if selections[i][0][-1] in sentence_end_markers:
242                 for i in range(select_length):
243                     snippet[i] = selections[i][i + 1]
244                 break
245         msg = ""
246         malkovich = "malkovich"
247         while 1:
248             new_end = markov(snippet)
249             for name in session.users_in_chan:
250                 if new_end[:len(name)] == name.lower():
251                     new_end = malkovich + new_end[len(name):]
252                     break
253             if len(msg) + len(new_end) > 200:
254                 break
255             msg += new_end + " "
256             for i in range(select_length - 1):
257                 snippet[i] = snippet[i + 1]
258             snippet[select_length - 1] = new_end
259
260         # Replace occurences of url escape string with random choice from urls.
261         while True:
262             index = msg.find(url_escape)
263             if index < 0:
264                 break
265             msg = msg.replace(url_escape, choice(urls), 1)
266
267         # More meaningful ways to randomly end sentences.
268         notice(msg + malkovich + ".")
269
270     def twt():
271         def try_open(mode):
272             try:
273                 twtfile = open(session.twtfile, mode)
274             except (PermissionError, FileNotFoundError) as err:
275                 notice("CAN'T ACCESS OR CREATE TWT FILE: " + str(err))
276                 return None
277             return twtfile
278
279         from datetime import datetime
280         if not os.access(session.twtfile, os.F_OK):
281             twtfile = try_open("w")
282             if None == twtfile:
283                 return
284             twtfile.close()
285         twtfile = try_open("a")
286         if None == twtfile:
287             return
288         twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
289         twtfile.close()
290         notice("WROTE TWT.")
291
292     if "addquote" == command:
293         addquote()
294     elif "quote" == command:
295         quote()
296     elif "markov" == command:
297         markov()
298     elif "twt" == command:
299         twt()
300
301
302 def handle_url(url, notice, show_url=False):
303
304     def mobile_twitter_hack(url):
305         re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
306         re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
307         m = re.search(re1, url)
308         if m and m.group(1) == 'mobile.twitter.com/' \
309                 and m.group(2) == '/status/':
310             m = re.search(re2, url)
311             url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
312             handle_url(url, notice, True)
313             return True
314
315     try:
316         r = requests.get(url, timeout=15)
317     except (requests.exceptions.TooManyRedirects,
318             requests.exceptions.ConnectionError,
319             requests.exceptions.InvalidURL,
320             UnicodeError,
321             requests.exceptions.InvalidSchema) as error:
322         notice("TROUBLE FOLLOWING URL: " + str(error))
323         return
324     if mobile_twitter_hack(url):
325         return
326     title = bs4.BeautifulSoup(r.text, "html5lib").title
327     if title and title.string:
328         prefix = "PAGE TITLE: "
329         if show_url:
330             prefix = "PAGE TITLE FOR <" + url + ">: "
331         notice(prefix + title.string.strip())
332     else:
333         notice("PAGE HAS NO TITLE TAG")
334
335
336 class Session:
337
338     def __init__(self, io, username, nickname, channel, twtfile, dbdir):
339         self.io = io
340         self.nickname = nickname
341         self.channel = channel
342         self.users_in_chan = []
343         self.twtfile = twtfile
344         self.dbdir = dbdir
345         self.io.send_line("NICK " + self.nickname)
346         self.io.send_line("USER " + username + " 0 * : ")
347         self.io.send_line("JOIN " + self.channel)
348         hash_string = hashlib.md5(self.channel.encode("utf-8")).hexdigest()
349         self.logdir = self.dbdir + "/irclogs_" + hash_string + "/"
350         if not os.path.exists(self.logdir):
351             os.makedirs(self.logdir)
352
353     def loop(self):
354
355         def log(line):
356             now = datetime.datetime.utcnow()
357             logfile = open(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a")
358             form = "%Y-%m-%d %H:%M:%S UTC\t"
359             logfile.write(now.strftime(form) + " " + line + "\n")
360             logfile.close()
361
362         def handle_privmsg(tokens):
363
364             def handle_input(msg, target):
365
366                 def notice(msg):
367                     self.io.send_line("NOTICE " + target + " :" + msg)
368
369                 matches = re.findall("(https?://[^\s>]+)", msg)
370                 for i in range(len(matches)):
371                     handle_url(matches[i], notice)
372                 if "!" == msg[0]:
373                     tokens = msg[1:].split()
374                     argument = str.join(" ", tokens[1:])
375                     handle_command(tokens[0], argument, notice, target, self)
376                     return
377                 hash_string = hashlib.md5(target.encode("utf-8")).hexdigest()
378                 markovfeed_name = self.dbdir + "/markovfeed_" + hash_string
379                 file = open(markovfeed_name, "a")
380                 file.write(msg + "\n")
381                 file.close()
382
383             sender = ""
384             for rune in tokens[0]:
385                 if rune == "!":
386                     break
387                 if rune != ":":
388                     sender += rune
389             receiver = ""
390             for rune in tokens[2]:
391                 if rune == "!":
392                     break
393                 if rune != ":":
394                     receiver += rune
395             target = sender
396             if receiver != self.nickname:
397                 target = receiver
398             msg = str.join(" ", tokens[3:])[1:]
399             if target == self.channel:
400                 log("<" + sender + "> " + msg)
401             handle_input(msg, target)
402
403         def name_from_join_or_part(tokens):
404             token = tokens[0][1:]
405             index_cut = token.find("@")
406             index_ex = token.find("!")
407             if index_ex > 0 and index_ex < index_cut:
408                 index_cut = index_ex
409             return token[:index_cut]
410
411         while True:
412             line = self.io.recv_line()
413             if not line:
414                 continue
415             tokens = line.split(" ")
416             if len(tokens) > 1:
417                 if tokens[0] == "PING":
418                     self.io.send_line("PONG " + tokens[1])
419                 elif tokens[1] == "PRIVMSG":
420                     handle_privmsg(tokens)
421                 elif tokens[1] == "353":
422                     names = tokens[5:]
423                     names[0] = names[0][1:]
424                     log("PRESENT: " + str.join(", ", names))
425                     for i in range(len(names)):
426                         names[i] = names[i].replace("@", "").replace("+", "")
427                     self.users_in_chan += names
428                 elif tokens[1] == "JOIN":
429                     name = name_from_join_or_part(tokens)
430                     if name != self.nickname:
431                         self.users_in_chan += [name]
432                     log(line)
433                 elif tokens[1] == "PART":
434                     name = name_from_join_or_part(tokens)
435                     del(self.users_in_chan[self.users_in_chan.index(name)])
436                     log(line)
437                 else:
438                     log(line)
439
440
441 def parse_command_line_arguments():
442     parser = argparse.ArgumentParser()
443     parser.add_argument("-s, --server", action="store", dest="server",
444                         default=SERVER,
445                         help="server or server net to connect to (default: "
446                         + SERVER + ")")
447     parser.add_argument("-p, --port", action="store", dest="port", type=int,
448                         default=PORT, help="port to connect to (default : "
449                         + str(PORT) + ")")
450     parser.add_argument("-w, --wait", action="store", dest="timeout",
451                         type=int, default=TIMEOUT,
452                         help="timeout in seconds after which to attempt " +
453                         "reconnect (default: " + str(TIMEOUT) + ")")
454     parser.add_argument("-u, --username", action="store", dest="username",
455                         default=USERNAME, help="username to use (default: "
456                         + USERNAME + ")")
457     parser.add_argument("-n, --nickname", action="store", dest="nickname",
458                         default=NICKNAME, help="nickname to use (default: "
459                         + NICKNAME + ")")
460     parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
461                         default=TWTFILE, help="twtxt file to use (default: "
462                         + TWTFILE + ")")
463     parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
464                         default=DBDIR, help="directory to store DB files in")
465     parser.add_argument("CHANNEL", action="store", help="channel to join")
466     opts, unknown = parser.parse_known_args()
467     return opts
468
469
470 opts = parse_command_line_arguments()
471 while True:
472     try:
473         io = IO(opts.server, opts.port, opts.timeout)
474         session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
475             opts.twtfile, opts.dbdir)
476         session.loop()
477     except ExceptionForRestart:
478         io.socket.close()
479         continue