home · contact · privacy
Improve help text.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import requests
10 import bs4
11 import random
12 import hashlib
13 import os
14 import signal
15 import plomsearch
16 import irclog
17
18 # Defaults, may be overwritten by command line arguments.
19 SERVER = "irc.freenode.net"
20 PORT = 6667
21 TIMEOUT = 240
22 USERNAME = "plomlombot"
23 NICKNAME = USERNAME
24 TWTFILE = ""
25 DBDIR = os.path.expanduser("~/plomlombot_db")
26
27
28 def write_to_file(path, mode, text):
29     f = open(path, mode)
30     f.write(text)
31     f.close()
32
33
34 class ExceptionForRestart(Exception):
35     pass
36
37
38 class Line:
39
40     def __init__(self, line):
41         self.line = line
42         self.tokens = line.split(" ")
43         self.sender = ""
44         if self.tokens[0][0] == ":":
45             for rune in self.tokens[0][1:]:
46                 if rune in {"!", "@"}:
47                     break
48                 self.sender += rune
49         self.receiver = ""
50         if len(self.tokens) > 2:
51             for rune in self.tokens[2]:
52                 if rune in {"!", "@"}:
53                     break
54                 if rune != ":":
55                     self.receiver += rune
56
57
58 class IO:
59
60     def __init__(self, server, port, timeout):
61         self.timeout = timeout
62         self.socket = socket.socket()
63         try:
64             self.socket.connect((server, port))
65         except TimeoutError:
66             raise ExceptionForRestart
67         self.socket.setblocking(0)
68         self.line_buffer = []
69         self.rune_buffer = ""
70         self.last_pong = time.time()
71         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
72
73     def _pingtest(self, send_ping=True):
74         if self.last_pong + self.timeout < time.time():
75             print("SERVER NOT ANSWERING")
76             raise ExceptionForRestart
77         if send_ping:
78             self.send_line("PING " + self.servername)
79
80     def send_line(self, msg):
81         msg = msg.replace("\r", " ")
82         msg = msg.replace("\n", " ")
83         if len(msg.encode("utf-8")) > 510:
84             print("NOT SENT LINE TO SERVER (too long): " + msg)
85         print("LINE TO SERVER: "
86               + str(datetime.datetime.now()) + ": " + msg)
87         msg = msg + "\r\n"
88         msg_len = len(msg)
89         total_sent_len = 0
90         while total_sent_len < msg_len:
91             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
92             if sent_len == 0:
93                 print("SOCKET CONNECTION BROKEN")
94                 raise ExceptionForRestart
95             total_sent_len += sent_len
96
97     def _recv_line_wrapped(self, send_ping=True):
98         if len(self.line_buffer) > 0:
99             return self.line_buffer.pop(0)
100         while True:
101             ready = select.select([self.socket], [], [], int(self.timeout / 2))
102             if not ready[0]:
103                 self._pingtest(send_ping)
104                 return None
105             self.last_pong = time.time()
106             received_bytes = self.socket.recv(1024)
107             try:
108                 received_runes = received_bytes.decode("UTF-8")
109             except UnicodeDecodeError:
110                 received_runes = received_bytes.decode("latin1")
111             if len(received_runes) == 0:
112                 print("SOCKET CONNECTION BROKEN")
113                 raise ExceptionForRestart
114             self.rune_buffer += received_runes
115             lines_split = str.split(self.rune_buffer, "\r\n")
116             self.line_buffer += lines_split[:-1]
117             self.rune_buffer = lines_split[-1]
118             if len(self.line_buffer) > 0:
119                 return self.line_buffer.pop(0)
120
121     def recv_line(self, send_ping=True):
122         line = self._recv_line_wrapped(send_ping)
123         if line:
124             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
125                   line)
126         return line
127
128
129 def handle_command(command, argument, notice, target, session):
130
131     def addquote():
132         if not os.access(session.quotesfile, os.F_OK):
133             write_to_file(session.quotesfile, "w",
134                           "QUOTES FOR " + target + ":\n")
135         write_to_file(session.quotesfile, "a", argument + "\n")
136         quotesfile = open(session.quotesfile, "r")
137         lines = quotesfile.readlines()
138         quotesfile.close()
139         notice("added quote #" + str(len(lines) - 1))
140
141     def quote():
142
143         def help():
144             notice("syntax: !quote [int] OR !quote search QUERY")
145             notice("QUERY may be a boolean grouping of quoted or unquoted " +
146                    "search terms, examples:")
147             notice("!quote search foo")
148             notice("!quote search foo AND (bar OR NOT baz)")
149             notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
150                    " OR 'bar\\'baz')")
151
152         if "" == argument:
153             tokens = []
154         else:
155             tokens = argument.split(" ")
156         if (len(tokens) > 1 and tokens[0] != "search") or \
157             (len(tokens) == 1 and
158                 (tokens[0] == "search" or not tokens[0].isdigit())):
159             help()
160             return
161         if not os.access(session.quotesfile, os.F_OK):
162             notice("no quotes available")
163             return
164         quotesfile = open(session.quotesfile, "r")
165         lines = quotesfile.readlines()
166         quotesfile.close()
167         lines = lines[1:]
168         if len(tokens) == 1:
169             i = int(tokens[0])
170             if i == 0 or i > len(lines):
171                 notice("there's no quote of that index")
172                 return
173             i = i - 1
174         elif len(tokens) > 1:
175             query = str.join(" ", tokens[1:])
176             try:
177                 results = plomsearch.search(query, lines)
178             except plomsearch.LogicParserError as err:
179                 notice("failed query parsing: " + str(err))
180                 return
181             if len(results) == 0:
182                 notice("no quotes matching query")
183             else:
184                 if len(results) > 3:
185                     notice("showing 3 of " + str(len(results)) + " quotes")
186                 for result in results[:3]:
187                     notice("quote #" + str(result[0] + 1) + ": "
188                            + result[1][:-1])
189             return
190         else:
191             i = random.randrange(len(lines))
192         notice("quote #" + str(i + 1) + ": " + lines[i][:-1])
193
194     def markov():
195
196         def help():
197             notice("syntax: !markov [integer from 1 to infinite]")
198
199         def markov(snippet):
200             usable_selections = []
201             for i in range(select_length, 0, -1):
202                 for selection in selections:
203                     add = True
204                     for j in range(i):
205                         j += 1
206                         if snippet[-j] != selection[-(j+1)]:
207                             add = False
208                             break
209                     if add:
210                         usable_selections += [selection]
211                 if [] != usable_selections:
212                     break
213             if [] == usable_selections:
214                 usable_selections = selections
215             selection = choice(usable_selections)
216             return selection[select_length]
217
218         if "" == argument:
219             tokens = []
220         else:
221             tokens = argument.split(" ")
222         if (len(tokens) > 1 or (len(tokens) == 1 and not tokens[0].isdigit())):
223             help()
224             return
225
226         from random import choice, shuffle
227         select_length = 2
228         if len(tokens) == 1:
229             n = int(tokens[0])
230             if n > 0:
231                 select_length = n
232             else:
233                 notice("bad value, using default: " + str(select_length))
234         selections = []
235
236         if not os.access(session.markovfile, os.F_OK):
237             notice("not enough text to markov for selection length")
238             return
239
240         # Lowercase incoming lines, ensure they end in a sentence end mark.
241         file = open(session.markovfile, "r")
242         lines = file.readlines()
243         file.close()
244         tokens = []
245         sentence_end_markers = ".!?)("
246         for line in lines:
247             line = line.lower().replace("\n", "")
248             if line[-1] not in sentence_end_markers:
249                 line += "."
250             tokens += line.split()
251         if len(tokens) - 1 <= select_length:
252             notice("not enough text to markov")
253             return
254
255         # Replace URLs with escape string for now, so that the Markov selector
256         # won't see them as different strings. Stash replaced URLs in urls.
257         urls = []
258         url_escape = "\nURL"
259         url_starts = ["http://", "https://", "<http://", "<https://"]
260         for i in range(len(tokens)):
261             for url_start in url_starts:
262                 if tokens[i][:len(url_start)] == url_start:
263                     length = len(tokens[i])
264                     if url_start[0] == "<":
265                         try:
266                             length = tokens[i].index(">") + 1
267                         except ValueError:
268                             pass
269                     urls += [tokens[i][:length]]
270                     tokens[i] = url_escape + tokens[i][length:]
271                     break
272
273         # For each snippet of select_length, use markov() to find continuation
274         # token from selections. Replace present users' names with malkovich.
275         # Start snippets with the beginning of a sentence, if possible.
276         for i in range(len(tokens) - select_length):
277             token_list = []
278             for j in range(select_length + 1):
279                 token_list += [tokens[i + j]]
280             selections += [token_list]
281         snippet = []
282         for i in range(select_length):
283             snippet += [""]
284         shuffle(selections)
285         for i in range(len(selections)):
286             if selections[i][0][-1] in sentence_end_markers:
287                 for j in range(select_length):
288                     snippet[j] = selections[j][j + 1]
289                 break
290         msg = ""
291         malkovich = "malkovich"
292         while 1:
293             new_end = markov(snippet)
294             for name in session.users_in_chan:
295                 if new_end[:len(name)] == name.lower():
296                     new_end = malkovich + new_end[len(name):]
297                     break
298             if len(msg) + len(new_end) > 200:
299                 break
300             msg += new_end + " "
301             for i in range(select_length - 1):
302                 snippet[i] = snippet[i + 1]
303             snippet[select_length - 1] = new_end
304
305         # Replace occurences of url escape string with random choice from urls.
306         while True:
307             index = msg.find(url_escape)
308             if index < 0:
309                 break
310             msg = msg.replace(url_escape, choice(urls), 1)
311
312         # More meaningful ways to randomly end sentences.
313         notice(msg + malkovich + ".")
314
315     def twt():
316         def try_open(mode):
317             try:
318                 twtfile = open(session.twtfile, mode)
319             except (PermissionError, FileNotFoundError) as err:
320                 notice("can't access or create twt file: " + str(err))
321                 return None
322             return twtfile
323
324         from datetime import datetime
325         if not os.access(session.twtfile, os.F_OK):
326             twtfile = try_open("w")
327             if None == twtfile:
328                 return
329             twtfile.close()
330         twtfile = try_open("a")
331         if None == twtfile:
332             return
333         twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
334         twtfile.close()
335         notice("wrote twt.")
336
337     if "addquote" == command:
338         addquote()
339     elif "quote" == command:
340         quote()
341     elif "markov" == command:
342         markov()
343     elif "twt" == command:
344         twt()
345
346
347 def handle_url(url, notice, show_url=False):
348
349     def mobile_twitter_hack(url):
350         re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
351         re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
352         m = re.search(re1, url)
353         if m and m.group(1) == 'mobile.twitter.com/' \
354                 and m.group(2) == '/status/':
355             m = re.search(re2, url)
356             url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
357             handle_url(url, notice, True)
358             return True
359
360     class TimeOut(Exception):
361         pass
362
363     def timeout_handler(ignore1, ignore2):
364         raise TimeOut("timeout")
365
366     signal.signal(signal.SIGALRM, timeout_handler)
367     signal.alarm(15)
368     try:
369         r = requests.get(url, headers = {'User-Agent': 'plomlombot'}, stream=True)
370         r.raw.decode_content = True
371         text = r.raw.read(10000000+1)
372         if len(text) > 10000000:
373             raise ValueError('Too large a response')
374     except (requests.exceptions.TooManyRedirects,
375             requests.exceptions.ConnectionError,
376             requests.exceptions.InvalidURL,
377             TimeOut,
378             UnicodeError,
379             ValueError,
380             requests.exceptions.InvalidSchema) as error:
381         signal.alarm(0)
382         notice("trouble following url: " + str(error))
383         return False
384     signal.alarm(0)
385     if mobile_twitter_hack(url):
386         return True
387     title = bs4.BeautifulSoup(text, "html5lib").title
388     if title and title.string:
389         prefix = "page title: "
390         if show_url:
391             prefix = "page title for <" + url + ">: "
392         notice(prefix + title.string.strip())
393     else:
394         notice("page has no title tag")
395     return True
396
397
398 class Session:
399
400     def __init__(self, io, username, nickname, channel, twtfile, dbdir, rmlogs):
401         self.io = io
402         self.nickname = nickname
403         self.username = username
404         self.channel = channel
405         self.users_in_chan = []
406         self.twtfile = twtfile
407         self.dbdir = dbdir
408         self.rmlogs = rmlogs
409         self.io.send_line("NICK " + self.nickname)
410         self.io.send_line("USER " + self.username + " 0 * : ")
411         self.io.send_line("JOIN " + self.channel)
412         hash_channel = hashlib.md5(self.channel.encode("utf-8")).hexdigest()
413         self.chandir = self.dbdir + "/" + hash_channel + "/"
414         self.rawlogdir = self.chandir + "raw_logs/"
415         self.logdir = self.chandir + "logs/"
416         if not os.path.exists(self.logdir):
417             os.makedirs(self.logdir)
418         if not os.path.exists(self.rawlogdir):
419             os.makedirs(self.rawlogdir)
420         self.markovfile = self.chandir + "markovfeed"
421         self.quotesfile = self.chandir + "quotes"
422
423     def loop(self):
424
425         def log(line):
426             if type(line) == str:
427                 line = Line(":" + self.nickname + "!~" + self.username +
428                             "@localhost" + " " + line)
429             now = datetime.datetime.utcnow()
430             form = "%Y-%m-%d %H:%M:%S UTC\t"
431             write_to_file(self.rawlogdir + now.strftime("%Y-%m-%d") + ".txt",
432                           "a", now.strftime(form) + " " + line.line + "\n")
433             to_log = irclog.format_logline(line, self.channel)
434             if to_log != None:
435                 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt",
436                               "a", now.strftime(form) + " " + to_log + "\n")
437
438         def handle_privmsg(line):
439
440             def notice(msg):
441                 line = "NOTICE " + target + " :" + msg
442                 self.io.send_line(line)
443                 log(line)
444
445             target = line.sender
446             if line.receiver != self.nickname:
447                 target = line.receiver
448             msg = str.join(" ", line.tokens[3:])[1:]
449             matches = re.findall("(https?://[^\s>]+)", msg)
450             url_count = 0
451             for i in range(len(matches)):
452                 if handle_url(matches[i], notice):
453                     url_count += 1
454                     if url_count == 3:
455                         notice("maximum number of urls to parse per message "
456                                "reached")
457                         break
458             if "!" == msg[0]:
459                 tokens = msg[1:].split()
460                 argument = str.join(" ", tokens[1:])
461                 handle_command(tokens[0], argument, notice, target, self)
462                 return
463             write_to_file(self.markovfile, "a", msg + "\n")
464
465         now = datetime.datetime.utcnow()
466         write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a",
467                       "-----------------------\n")
468         while True:
469             if self.rmlogs > 0:
470                 for f in os.listdir(self.logdir):
471                     f = os.path.join(self.logdir, f)
472                     if os.path.isfile(f) and \
473                             os.stat(f).st_mtime < time.time() - self.rmlogs:
474                         os.remove(f)
475             line = self.io.recv_line()
476             if not line:
477                 continue
478             line = Line(line)
479             log(line)
480             if len(line.tokens) > 1:
481                 if line.tokens[0] == "PING":
482                     self.io.send_line("PONG " + line.tokens[1])
483                 elif line.tokens[1] == "PRIVMSG":
484                     handle_privmsg(line)
485                 elif line.tokens[1] == "353":
486                     names = line.tokens[5:]
487                     names[0] = names[0][1:]
488                     for i in range(len(names)):
489                         names[i] = names[i].replace("@", "").replace("+", "")
490                     self.users_in_chan += names
491                 elif line.tokens[1] == "JOIN" and line.sender != self.nickname:
492                     self.users_in_chan += [line.sender]
493                 elif line.tokens[1] == "PART":
494                     del(self.users_in_chan[self.users_in_chan.index(line.sender)])
495                 elif line.tokens[1] == "NICK":
496                     del(self.users_in_chan[self.users_in_chan.index(line.sender)])
497                     self.users_in_chan += [line.receiver]
498
499
500 def parse_command_line_arguments():
501     parser = argparse.ArgumentParser()
502     parser.add_argument("-s, --server", action="store", dest="server",
503                         default=SERVER,
504                         help="server or server net to connect to (default: "
505                         + SERVER + ")")
506     parser.add_argument("-p, --port", action="store", dest="port", type=int,
507                         default=PORT, help="port to connect to (default : "
508                         + str(PORT) + ")")
509     parser.add_argument("-w, --wait", action="store", dest="timeout",
510                         type=int, default=TIMEOUT,
511                         help="timeout in seconds after which to attempt "
512                         "reconnect (default: " + str(TIMEOUT) + ")")
513     parser.add_argument("-u, --username", action="store", dest="username",
514                         default=USERNAME, help="username to use (default: "
515                         + USERNAME + ")")
516     parser.add_argument("-n, --nickname", action="store", dest="nickname",
517                         default=NICKNAME, help="nickname to use (default: "
518                         + NICKNAME + ")")
519     parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
520                         default=TWTFILE, help="twtxt file to use (default: "
521                         + TWTFILE + ")")
522     parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
523                         default=DBDIR, help="directory to store DB files in")
524     parser.add_argument("-r, --rmlogs", action="store", dest="rmlogs",
525                         type=int, default=0,
526                         help="maximum age in seconds for logfiles in logs/ "
527                         "(0 means: never delete, and is default)")
528     parser.add_argument("CHANNEL", action="store", help="channel to join")
529     opts, unknown = parser.parse_known_args()
530     return opts
531
532
533 opts = parse_command_line_arguments()
534 while True:
535     try:
536         io = IO(opts.server, opts.port, opts.timeout)
537         hash_server = hashlib.md5(opts.server.encode("utf-8")).hexdigest()
538         dbdir = opts.dbdir + "/" + hash_server 
539         session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
540             opts.twtfile, dbdir, opts.rmlogs)
541         session.loop()
542     except ExceptionForRestart:
543         io.socket.close()
544         continue