home · contact · privacy
Rewrite log system.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import requests
10 import bs4
11 import random
12 import hashlib
13 import os
14 import signal
15 import plomsearch
16 import irclog
17
18 # Defaults, may be overwritten by command line arguments.
19 SERVER = "irc.freenode.net"
20 PORT = 6667
21 TIMEOUT = 240
22 USERNAME = "plomlombot"
23 NICKNAME = USERNAME
24 TWTFILE = ""
25 DBDIR = os.path.expanduser("~/plomlombot_db")
26
27
28 def write_to_file(path, mode, text):
29     f = open(path, mode)
30     f.write(text)
31     f.close()
32
33
34 class ExceptionForRestart(Exception):
35     pass
36
37
38 class Line:
39
40     def __init__(self, line):
41         self.line = line
42         self.tokens = line.split(" ")
43         self.sender = ""
44         if self.tokens[0][0] == ":":
45             for rune in self.tokens[0][1:]:
46                 if rune in {"!", "@"}:
47                     break
48                 self.sender += rune
49         self.receiver = ""
50         if len(self.tokens) > 2:
51             for rune in self.tokens[2]:
52                 if rune in {"!", "@"}:
53                     break
54                 if rune != ":":
55                     self.receiver += rune
56
57
58 class Log:
59
60     def __init__(self, chandir, nickname, username, channel, rmlogs):
61         self.nickname = nickname
62         self.username = username
63         self.channel = channel
64         self.chandir = chandir
65         self.rmlogcycle = rmlogs
66         self.rawlogdir = chandir + "raw_logs/"
67         self.logdir = chandir + "logs/"
68         if not os.path.exists(self.logdir):
69             os.makedirs(self.logdir)
70         if not os.path.exists(self.rawlogdir):
71             os.makedirs(self.rawlogdir)
72
73     def log(self, line, sent=False):
74         identity = ""
75         separator = " > "
76         if sent:
77             separator = " < "
78             line = Line(line)
79             line.sender = self.nickname
80             identity = self.username + "@localhost"
81         else:
82             if type(line) == str:
83                 line = Line(line)
84         now = datetime.datetime.utcnow()
85         form = "%Y-%m-%d %H:%M:%S UTC"
86         write_to_file(self.rawlogdir + now.strftime("%Y-%m-%d") + ".txt", "a",
87                       now.strftime(form) + separator + line.line + "\n")
88         to_log = irclog.format_logline(line, self.channel, identity)
89         if to_log != None:
90             write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a",
91                           now.strftime(form) + " " + to_log + "\n")
92
93     def rmlogs(self):
94         if self.rmlogcycle > 0:
95             for f in os.listdir(self.logdir):
96                 f = os.path.join(self.logdir, f)
97                 if os.path.isfile(f) and \
98                         os.stat(f).st_mtime < time.time() - self.rmlogcycle:
99                     os.remove(f)
100
101     def separator_line(self):
102         now = datetime.datetime.utcnow()
103         write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a",
104                       "-----------------------\n")
105
106
107 class IO:
108
109     def __init__(self, server, port, timeout):
110         self.log = None
111         self.timeout = timeout
112         self.socket = socket.socket()
113         try:
114             self.socket.connect((server, port))
115         except TimeoutError:
116             raise ExceptionForRestart
117         self.socket.setblocking(0)
118         self.line_buffer = []
119         self.rune_buffer = ""
120         self.last_pong = time.time()
121         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
122
123     def _pingtest(self, send_ping=True):
124         if self.last_pong + self.timeout < time.time():
125             print("SERVER NOT ANSWERING")
126             raise ExceptionForRestart
127         if send_ping:
128             self.send_line("PING " + self.servername)
129
130     def send_line(self, msg):
131         msg = msg.replace("\r", " ")
132         msg = msg.replace("\n", " ")
133         if len(msg.encode("utf-8")) > 510:
134             print("NOT SENT LINE TO SERVER (too long): " + msg)
135         print("LINE TO SERVER: "
136               + str(datetime.datetime.now()) + ": " + msg)
137         if self.log != None:
138             self.log.log(msg, True)
139         msg = msg + "\r\n"
140         msg_len = len(msg)
141         total_sent_len = 0
142         while total_sent_len < msg_len:
143             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
144             if sent_len == 0:
145                 print("SOCKET CONNECTION BROKEN")
146                 raise ExceptionForRestart
147             total_sent_len += sent_len
148
149     def _recv_line_wrapped(self, send_ping=True):
150         if len(self.line_buffer) > 0:
151             return self.line_buffer.pop(0)
152         while True:
153             ready = select.select([self.socket], [], [], int(self.timeout / 2))
154             if not ready[0]:
155                 self._pingtest(send_ping)
156                 return None
157             self.last_pong = time.time()
158             received_bytes = self.socket.recv(1024)
159             try:
160                 received_runes = received_bytes.decode("UTF-8")
161             except UnicodeDecodeError:
162                 received_runes = received_bytes.decode("latin1")
163             if len(received_runes) == 0:
164                 print("SOCKET CONNECTION BROKEN")
165                 raise ExceptionForRestart
166             self.rune_buffer += received_runes
167             lines_split = str.split(self.rune_buffer, "\r\n")
168             self.line_buffer += lines_split[:-1]
169             self.rune_buffer = lines_split[-1]
170             if len(self.line_buffer) > 0:
171                 return self.line_buffer.pop(0)
172
173     def recv_line(self, send_ping=True):
174         line = self._recv_line_wrapped(send_ping)
175         if line:
176             if self.log != None:
177                 self.log.log(line)
178             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
179                   line)
180         return line
181
182
183 def handle_command(command, argument, notice, target, session):
184
185     def addquote():
186         if not os.access(session.quotesfile, os.F_OK):
187             write_to_file(session.quotesfile, "w",
188                           "QUOTES FOR " + target + ":\n")
189         write_to_file(session.quotesfile, "a", argument + "\n")
190         quotesfile = open(session.quotesfile, "r")
191         lines = quotesfile.readlines()
192         quotesfile.close()
193         notice("added quote #" + str(len(lines) - 1))
194
195     def quote():
196
197         def help():
198             notice("syntax: !quote [int] OR !quote search QUERY")
199             notice("QUERY may be a boolean grouping of quoted or unquoted " +
200                    "search terms, examples:")
201             notice("!quote search foo")
202             notice("!quote search foo AND (bar OR NOT baz)")
203             notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
204                    " OR 'bar\\'baz')")
205
206         if "" == argument:
207             tokens = []
208         else:
209             tokens = argument.split(" ")
210         if (len(tokens) > 1 and tokens[0] != "search") or \
211             (len(tokens) == 1 and
212                 (tokens[0] == "search" or not tokens[0].isdigit())):
213             help()
214             return
215         if not os.access(session.quotesfile, os.F_OK):
216             notice("no quotes available")
217             return
218         quotesfile = open(session.quotesfile, "r")
219         lines = quotesfile.readlines()
220         quotesfile.close()
221         lines = lines[1:]
222         if len(tokens) == 1:
223             i = int(tokens[0])
224             if i == 0 or i > len(lines):
225                 notice("there's no quote of that index")
226                 return
227             i = i - 1
228         elif len(tokens) > 1:
229             query = str.join(" ", tokens[1:])
230             try:
231                 results = plomsearch.search(query, lines)
232             except plomsearch.LogicParserError as err:
233                 notice("failed query parsing: " + str(err))
234                 return
235             if len(results) == 0:
236                 notice("no quotes matching query")
237             else:
238                 if len(results) > 3:
239                     notice("showing 3 of " + str(len(results)) + " quotes")
240                 for result in results[:3]:
241                     notice("quote #" + str(result[0] + 1) + ": "
242                            + result[1][:-1])
243             return
244         else:
245             i = random.randrange(len(lines))
246         notice("quote #" + str(i + 1) + ": " + lines[i][:-1])
247
248     def markov():
249
250         def help():
251             notice("syntax: !markov [integer from 1 to infinite]")
252
253         def markov(snippet):
254             usable_selections = []
255             for i in range(select_length, 0, -1):
256                 for selection in selections:
257                     add = True
258                     for j in range(i):
259                         j += 1
260                         if snippet[-j] != selection[-(j+1)]:
261                             add = False
262                             break
263                     if add:
264                         usable_selections += [selection]
265                 if [] != usable_selections:
266                     break
267             if [] == usable_selections:
268                 usable_selections = selections
269             selection = choice(usable_selections)
270             return selection[select_length]
271
272         if "" == argument:
273             tokens = []
274         else:
275             tokens = argument.split(" ")
276         if (len(tokens) > 1 or (len(tokens) == 1 and not tokens[0].isdigit())):
277             help()
278             return
279
280         from random import choice, shuffle
281         select_length = 2
282         if len(tokens) == 1:
283             n = int(tokens[0])
284             if n > 0:
285                 select_length = n
286             else:
287                 notice("bad value, using default: " + str(select_length))
288         selections = []
289
290         if not os.access(session.markovfile, os.F_OK):
291             notice("not enough text to markov for selection length")
292             return
293
294         # Lowercase incoming lines, ensure they end in a sentence end mark.
295         file = open(session.markovfile, "r")
296         lines = file.readlines()
297         file.close()
298         tokens = []
299         sentence_end_markers = ".!?)("
300         for line in lines:
301             line = line.lower().replace("\n", "")
302             if line[-1] not in sentence_end_markers:
303                 line += "."
304             tokens += line.split()
305         if len(tokens) - 1 <= select_length:
306             notice("not enough text to markov")
307             return
308
309         # Replace URLs with escape string for now, so that the Markov selector
310         # won't see them as different strings. Stash replaced URLs in urls.
311         urls = []
312         url_escape = "\nURL"
313         url_starts = ["http://", "https://", "<http://", "<https://"]
314         for i in range(len(tokens)):
315             for url_start in url_starts:
316                 if tokens[i][:len(url_start)] == url_start:
317                     length = len(tokens[i])
318                     if url_start[0] == "<":
319                         try:
320                             length = tokens[i].index(">") + 1
321                         except ValueError:
322                             pass
323                     urls += [tokens[i][:length]]
324                     tokens[i] = url_escape + tokens[i][length:]
325                     break
326
327         # For each snippet of select_length, use markov() to find continuation
328         # token from selections. Replace present users' names with malkovich.
329         # Start snippets with the beginning of a sentence, if possible.
330         for i in range(len(tokens) - select_length):
331             token_list = []
332             for j in range(select_length + 1):
333                 token_list += [tokens[i + j]]
334             selections += [token_list]
335         snippet = []
336         for i in range(select_length):
337             snippet += [""]
338         shuffle(selections)
339         for i in range(len(selections)):
340             if selections[i][0][-1] in sentence_end_markers:
341                 for j in range(select_length):
342                     snippet[j] = selections[j][j + 1]
343                 break
344         msg = ""
345         malkovich = "malkovich"
346         while 1:
347             new_end = markov(snippet)
348             for name in session.users_in_chan:
349                 if new_end[:len(name)] == name.lower():
350                     new_end = malkovich + new_end[len(name):]
351                     break
352             if len(msg) + len(new_end) > 200:
353                 break
354             msg += new_end + " "
355             for i in range(select_length - 1):
356                 snippet[i] = snippet[i + 1]
357             snippet[select_length - 1] = new_end
358
359         # Replace occurences of url escape string with random choice from urls.
360         while True:
361             index = msg.find(url_escape)
362             if index < 0:
363                 break
364             msg = msg.replace(url_escape, choice(urls), 1)
365
366         # More meaningful ways to randomly end sentences.
367         notice(msg + malkovich + ".")
368
369     def twt():
370         def try_open(mode):
371             try:
372                 twtfile = open(session.twtfile, mode)
373             except (PermissionError, FileNotFoundError) as err:
374                 notice("can't access or create twt file: " + str(err))
375                 return None
376             return twtfile
377
378         from datetime import datetime
379         if not os.access(session.twtfile, os.F_OK):
380             twtfile = try_open("w")
381             if None == twtfile:
382                 return
383             twtfile.close()
384         twtfile = try_open("a")
385         if None == twtfile:
386             return
387         twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
388         twtfile.close()
389         notice("wrote twt.")
390
391     if "addquote" == command:
392         addquote()
393     elif "quote" == command:
394         quote()
395     elif "markov" == command:
396         markov()
397     elif "twt" == command:
398         twt()
399
400
401 def handle_url(url, notice, show_url=False):
402
403     def mobile_twitter_hack(url):
404         re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
405         re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
406         m = re.search(re1, url)
407         if m and m.group(1) == 'mobile.twitter.com/' \
408                 and m.group(2) == '/status/':
409             m = re.search(re2, url)
410             url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
411             handle_url(url, notice, True)
412             return True
413
414     class TimeOut(Exception):
415         pass
416
417     def timeout_handler(ignore1, ignore2):
418         raise TimeOut("timeout")
419
420     signal.signal(signal.SIGALRM, timeout_handler)
421     signal.alarm(15)
422     try:
423         r = requests.get(url, headers = {'User-Agent': 'plomlombot'}, stream=True)
424         r.raw.decode_content = True
425         text = r.raw.read(10000000+1)
426         if len(text) > 10000000:
427             raise ValueError('Too large a response')
428     except (requests.exceptions.TooManyRedirects,
429             requests.exceptions.ConnectionError,
430             requests.exceptions.InvalidURL,
431             TimeOut,
432             UnicodeError,
433             ValueError,
434             requests.exceptions.InvalidSchema) as error:
435         signal.alarm(0)
436         notice("trouble following url: " + str(error))
437         return False
438     signal.alarm(0)
439     if mobile_twitter_hack(url):
440         return True
441     title = bs4.BeautifulSoup(text, "html5lib").title
442     if title and title.string:
443         prefix = "page title: "
444         if show_url:
445             prefix = "page title for <" + url + ">: "
446         notice(prefix + title.string.strip())
447     else:
448         notice("page has no title tag")
449     return True
450
451
452 class Session:
453
454     def __init__(self, io, username, nickname, channel, twtfile, dbdir, rmlogs):
455         self.io = io
456         self.nickname = nickname
457         self.users_in_chan = []
458         self.twtfile = twtfile
459         hash_channel = hashlib.md5(channel.encode("utf-8")).hexdigest()
460         chandir = dbdir + "/" + hash_channel + "/"
461         self.markovfile = chandir + "markovfeed"
462         self.quotesfile = chandir + "quotes"
463         self.log = Log(chandir, self.nickname, username, channel, rmlogs)
464         self.io.send_line("NICK " + self.nickname)
465         self.io.send_line("USER " + username + " 0 * : ")
466         self.io.send_line("JOIN " + channel)
467         self.io.log = self.log
468         self.log.separator_line()
469
470     def loop(self):
471
472         def handle_privmsg(line):
473
474             def notice(msg):
475                 line = "NOTICE " + target + " :" + msg
476                 self.io.send_line(line)
477
478             target = line.sender
479             if line.receiver != self.nickname:
480                 target = line.receiver
481             msg = str.join(" ", line.tokens[3:])[1:]
482             matches = re.findall("(https?://[^\s>]+)", msg)
483             url_count = 0
484             for i in range(len(matches)):
485                 if handle_url(matches[i], notice):
486                     url_count += 1
487                     if url_count == 3:
488                         notice("maximum number of urls to parse per message "
489                                "reached")
490                         break
491             if "!" == msg[0]:
492                 tokens = msg[1:].split()
493                 argument = str.join(" ", tokens[1:])
494                 handle_command(tokens[0], argument, notice, target, self)
495                 return
496             write_to_file(self.markovfile, "a", msg + "\n")
497
498         while True:
499             self.log.rmlogs()
500             line = self.io.recv_line()
501             if not line:
502                 continue
503             line = Line(line)
504             if len(line.tokens) > 1:
505                 if line.tokens[0] == "PING":
506                     self.io.send_line("PONG " + line.tokens[1])
507                 elif line.tokens[1] == "PRIVMSG":
508                     handle_privmsg(line)
509                 elif line.tokens[1] == "353":
510                     names = line.tokens[5:]
511                     names[0] = names[0][1:]
512                     for i in range(len(names)):
513                         names[i] = names[i].replace("@", "").replace("+", "")
514                     self.users_in_chan += names
515                 elif line.tokens[1] == "JOIN" and line.sender != self.nickname:
516                     self.users_in_chan += [line.sender]
517                 elif line.tokens[1] == "PART":
518                     del(self.users_in_chan[self.users_in_chan.index(line.sender)])
519                 elif line.tokens[1] == "NICK":
520                     del(self.users_in_chan[self.users_in_chan.index(line.sender)])
521                     self.users_in_chan += [line.receiver]
522
523
524 def parse_command_line_arguments():
525     parser = argparse.ArgumentParser()
526     parser.add_argument("-s, --server", action="store", dest="server",
527                         default=SERVER,
528                         help="server or server net to connect to (default: "
529                         + SERVER + ")")
530     parser.add_argument("-p, --port", action="store", dest="port", type=int,
531                         default=PORT, help="port to connect to (default : "
532                         + str(PORT) + ")")
533     parser.add_argument("-w, --wait", action="store", dest="timeout",
534                         type=int, default=TIMEOUT,
535                         help="timeout in seconds after which to attempt "
536                         "reconnect (default: " + str(TIMEOUT) + ")")
537     parser.add_argument("-u, --username", action="store", dest="username",
538                         default=USERNAME, help="username to use (default: "
539                         + USERNAME + ")")
540     parser.add_argument("-n, --nickname", action="store", dest="nickname",
541                         default=NICKNAME, help="nickname to use (default: "
542                         + NICKNAME + ")")
543     parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
544                         default=TWTFILE, help="twtxt file to use (default: "
545                         + TWTFILE + ")")
546     parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
547                         default=DBDIR, help="directory to store DB files in")
548     parser.add_argument("-r, --rmlogs", action="store", dest="rmlogs",
549                         type=int, default=0,
550                         help="maximum age in seconds for logfiles in logs/ "
551                         "(0 means: never delete, and is default)")
552     parser.add_argument("CHANNEL", action="store", help="channel to join")
553     opts, unknown = parser.parse_known_args()
554     return opts
555
556
557 opts = parse_command_line_arguments()
558 while True:
559     try:
560         io = IO(opts.server, opts.port, opts.timeout)
561         hash_server = hashlib.md5(opts.server.encode("utf-8")).hexdigest()
562         dbdir = opts.dbdir + "/" + hash_server 
563         session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
564             opts.twtfile, dbdir, opts.rmlogs)
565         session.loop()
566     except ExceptionForRestart:
567         io.socket.close()
568         continue