home · contact · privacy
Overhaul DB directory structure.
[plomlombot-irc.git] / plomlombot.py
1 #!/usr/bin/python3
2
3 import argparse
4 import socket
5 import datetime
6 import select
7 import time
8 import re
9 import requests
10 import bs4
11 import random
12 import hashlib
13 import os
14 import plomsearch
15
16 # Defaults, may be overwritten by command line arguments.
17 SERVER = "irc.freenode.net"
18 PORT = 6667
19 TIMEOUT = 240
20 USERNAME = "plomlombot"
21 NICKNAME = USERNAME
22 TWTFILE = ""
23 DBDIR = os.path.expanduser("~/plomlombot_db")
24
25
26 class ExceptionForRestart(Exception):
27     pass
28
29
30 class IO:
31
32     def __init__(self, server, port, timeout):
33         self.timeout = timeout
34         self.socket = socket.socket()
35         self.socket.connect((server, port))
36         self.socket.setblocking(0)
37         self.line_buffer = []
38         self.rune_buffer = ""
39         self.last_pong = time.time()
40         self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
41
42     def _pingtest(self, send_ping=True):
43         if self.last_pong + self.timeout < time.time():
44             print("SERVER NOT ANSWERING")
45             raise ExceptionForRestart
46         if send_ping:
47             self.send_line("PING " + self.servername)
48
49     def send_line(self, msg):
50         msg = msg.replace("\r", " ")
51         msg = msg.replace("\n", " ")
52         if len(msg.encode("utf-8")) > 510:
53             print("NOT SENT LINE TO SERVER (too long): " + msg)
54         print("LINE TO SERVER: "
55               + str(datetime.datetime.now()) + ": " + msg)
56         msg = msg + "\r\n"
57         msg_len = len(msg)
58         total_sent_len = 0
59         while total_sent_len < msg_len:
60             sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
61             if sent_len == 0:
62                 print("SOCKET CONNECTION BROKEN")
63                 raise ExceptionForRestart
64             total_sent_len += sent_len
65
66     def _recv_line_wrapped(self, send_ping=True):
67         if len(self.line_buffer) > 0:
68             return self.line_buffer.pop(0)
69         while True:
70             ready = select.select([self.socket], [], [], int(self.timeout / 2))
71             if not ready[0]:
72                 self._pingtest(send_ping)
73                 return None
74             self.last_pong = time.time()
75             received_bytes = self.socket.recv(1024)
76             try:
77                 received_runes = received_bytes.decode("UTF-8")
78             except UnicodeDecodeError:
79                 received_runes = received_bytes.decode("latin1")
80             if len(received_runes) == 0:
81                 print("SOCKET CONNECTION BROKEN")
82                 raise ExceptionForRestart
83             self.rune_buffer += received_runes
84             lines_split = str.split(self.rune_buffer, "\r\n")
85             self.line_buffer += lines_split[:-1]
86             self.rune_buffer = lines_split[-1]
87             if len(self.line_buffer) > 0:
88                 return self.line_buffer.pop(0)
89
90     def recv_line(self, send_ping=True):
91         line = self._recv_line_wrapped(send_ping)
92         if line:
93             print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
94                   line)
95         return line
96
97
98 def handle_command(command, argument, notice, target, session):
99
100     def addquote():
101         if not os.access(session.quotesfile, os.F_OK):
102             quotesfile = open(session.quotesfile, "w")
103             quotesfile.write("QUOTES FOR " + target + ":\n")
104             quotesfile.close()
105         quotesfile = open(session.quotesfile, "a")
106         quotesfile.write(argument + "\n")
107         quotesfile.close()
108         quotesfile = open(session.quotesfile, "r")
109         lines = quotesfile.readlines()
110         quotesfile.close()
111         notice("ADDED QUOTE #" + str(len(lines) - 1))
112
113     def quote():
114
115         def help():
116             notice("SYNTAX: !quote [int] OR !quote search QUERY")
117             notice("QUERY may be a boolean grouping of quoted or unquoted " +
118                    "search terms, examples:")
119             notice("!quote search foo")
120             notice("!quote search foo AND (bar OR NOT baz)")
121             notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
122                    " OR 'bar\\'baz')")
123
124         if "" == argument:
125             tokens = []
126         else:
127             tokens = argument.split(" ")
128         if (len(tokens) > 1 and tokens[0] != "search") or \
129             (len(tokens) == 1 and
130                 (tokens[0] == "search" or not tokens[0].isdigit())):
131             help()
132             return
133         if not os.access(session.quotesfile, os.F_OK):
134             notice("NO QUOTES AVAILABLE")
135             return
136         quotesfile = open(session.quotesfile, "r")
137         lines = quotesfile.readlines()
138         quotesfile.close()
139         lines = lines[1:]
140         if len(tokens) == 1:
141             i = int(tokens[0])
142             if i == 0 or i > len(lines):
143                 notice("THERE'S NO QUOTE OF THAT INDEX")
144                 return
145             i = i - 1
146         elif len(tokens) > 1:
147             query = str.join(" ", tokens[1:])
148             try:
149                 results = plomsearch.search(query, lines)
150             except plomsearch.LogicParserError as err:
151                 notice("FAILED QUERY PARSING: " + str(err))
152                 return
153             if len(results) == 0:
154                 notice("NO QUOTES MATCHING QUERY")
155             else:
156                 for result in results:
157                     notice("QUOTE #" + str(result[0] + 1) + " : " + result[1])
158             return
159         else:
160             i = random.randrange(len(lines))
161         notice("QUOTE #" + str(i + 1) + ": " + lines[i])
162
163     def markov():
164         from random import choice, shuffle
165         select_length = 2
166         selections = []
167
168         def markov(snippet):
169             usable_selections = []
170             for i in range(select_length, 0, -1):
171                 for selection in selections:
172                     add = True
173                     for j in range(i):
174                         j += 1
175                         if snippet[-j] != selection[-(j+1)]:
176                             add = False
177                             break
178                     if add:
179                         usable_selections += [selection]
180                 if [] != usable_selections:
181                     break
182             if [] == usable_selections:
183                 usable_selections = selections
184             selection = choice(usable_selections)
185             return selection[select_length]
186
187         if not os.access(session.markovfile, os.F_OK):
188             notice("NOT ENOUGH TEXT TO MARKOV.")
189             return
190
191         # Lowercase incoming lines, ensure they end in a sentence end mark.
192         file = open(session.markovfile, "r")
193         lines = file.readlines()
194         file.close()
195         tokens = []
196         sentence_end_markers = ".!?)("
197         for line in lines:
198             line = line.lower().replace("\n", "")
199             if line[-1] not in sentence_end_markers:
200                 line += "."
201             tokens += line.split()
202         if len(tokens) <= select_length:
203             notice("NOT ENOUGH TEXT TO MARKOV.")
204             return
205
206         # Replace URLs with escape string for now, so that the Markov selector
207         # won't see them as different strings. Stash replaced URLs in urls.
208         urls = []
209         url_escape = "\nURL"
210         url_starts = ["http://", "https://", "<http://", "<https://"]
211         for i in range(len(tokens)):
212             for url_start in url_starts:
213                 if tokens[i][:len(url_start)] == url_start:
214                     length = len(tokens[i])
215                     if url_start[0] == "<":
216                         try:
217                             length = tokens[i].index(">") + 1
218                         except ValueError:
219                             pass
220                     urls += [tokens[i][:length]]
221                     tokens[i] = url_escape + tokens[i][length:]
222                     break
223
224         # For each snippet of select_length, use markov() to find continuation
225         # token from selections. Replace present users' names with malkovich.
226         # Start snippets with the beginning of a sentence, if possible.
227         for i in range(len(tokens) - select_length):
228             token_list = []
229             for j in range(select_length + 1):
230                 token_list += [tokens[i + j]]
231             selections += [token_list]
232         snippet = []
233         for i in range(select_length):
234             snippet += [""]
235         shuffle(selections)
236         for i in range(len(selections)):
237             if selections[i][0][-1] in sentence_end_markers:
238                 for i in range(select_length):
239                     snippet[i] = selections[i][i + 1]
240                 break
241         msg = ""
242         malkovich = "malkovich"
243         while 1:
244             new_end = markov(snippet)
245             for name in session.users_in_chan:
246                 if new_end[:len(name)] == name.lower():
247                     new_end = malkovich + new_end[len(name):]
248                     break
249             if len(msg) + len(new_end) > 200:
250                 break
251             msg += new_end + " "
252             for i in range(select_length - 1):
253                 snippet[i] = snippet[i + 1]
254             snippet[select_length - 1] = new_end
255
256         # Replace occurences of url escape string with random choice from urls.
257         while True:
258             index = msg.find(url_escape)
259             if index < 0:
260                 break
261             msg = msg.replace(url_escape, choice(urls), 1)
262
263         # More meaningful ways to randomly end sentences.
264         notice(msg + malkovich + ".")
265
266     def twt():
267         def try_open(mode):
268             try:
269                 twtfile = open(session.twtfile, mode)
270             except (PermissionError, FileNotFoundError) as err:
271                 notice("CAN'T ACCESS OR CREATE TWT FILE: " + str(err))
272                 return None
273             return twtfile
274
275         from datetime import datetime
276         if not os.access(session.twtfile, os.F_OK):
277             twtfile = try_open("w")
278             if None == twtfile:
279                 return
280             twtfile.close()
281         twtfile = try_open("a")
282         if None == twtfile:
283             return
284         twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
285         twtfile.close()
286         notice("WROTE TWT.")
287
288     if "addquote" == command:
289         addquote()
290     elif "quote" == command:
291         quote()
292     elif "markov" == command:
293         markov()
294     elif "twt" == command:
295         twt()
296
297
298 def handle_url(url, notice, show_url=False):
299
300     def mobile_twitter_hack(url):
301         re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
302         re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
303         m = re.search(re1, url)
304         if m and m.group(1) == 'mobile.twitter.com/' \
305                 and m.group(2) == '/status/':
306             m = re.search(re2, url)
307             url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
308             handle_url(url, notice, True)
309             return True
310
311     try:
312         r = requests.get(url, timeout=15)
313     except (requests.exceptions.TooManyRedirects,
314             requests.exceptions.ConnectionError,
315             requests.exceptions.InvalidURL,
316             UnicodeError,
317             requests.exceptions.InvalidSchema) as error:
318         notice("TROUBLE FOLLOWING URL: " + str(error))
319         return
320     if mobile_twitter_hack(url):
321         return
322     title = bs4.BeautifulSoup(r.text, "html5lib").title
323     if title and title.string:
324         prefix = "PAGE TITLE: "
325         if show_url:
326             prefix = "PAGE TITLE FOR <" + url + ">: "
327         notice(prefix + title.string.strip())
328     else:
329         notice("PAGE HAS NO TITLE TAG")
330
331
332 class Session:
333
334     def __init__(self, io, username, nickname, channel, twtfile, dbdir):
335         self.io = io
336         self.nickname = nickname
337         self.channel = channel
338         self.users_in_chan = []
339         self.twtfile = twtfile
340         self.dbdir = dbdir
341         self.io.send_line("NICK " + self.nickname)
342         self.io.send_line("USER " + username + " 0 * : ")
343         self.io.send_line("JOIN " + self.channel)
344         hash_channel = hashlib.md5(self.channel.encode("utf-8")).hexdigest()
345         self.chandir = self.dbdir + "/" + hash_channel + "/"
346         self.logdir = self.chandir + "logs/"
347         if not os.path.exists(self.logdir):
348             os.makedirs(self.logdir)
349         self.markovfile = self.chandir + "markovfeed"
350         self.quotesfile = self.chandir + "quotes"
351
352     def loop(self):
353
354         def log(line):
355             now = datetime.datetime.utcnow()
356             logfile = open(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a")
357             form = "%Y-%m-%d %H:%M:%S UTC\t"
358             logfile.write(now.strftime(form) + " " + line + "\n")
359             logfile.close()
360
361         def handle_privmsg(tokens):
362
363             def handle_input(msg, target):
364
365                 def notice(msg):
366                     self.io.send_line("NOTICE " + target + " :" + msg)
367
368                 matches = re.findall("(https?://[^\s>]+)", msg)
369                 for i in range(len(matches)):
370                     handle_url(matches[i], notice)
371                 if "!" == msg[0]:
372                     tokens = msg[1:].split()
373                     argument = str.join(" ", tokens[1:])
374                     handle_command(tokens[0], argument, notice, target, self)
375                     return
376                 file = open(self.markovfile, "a")
377                 file.write(msg + "\n")
378                 file.close()
379
380             sender = ""
381             for rune in tokens[0]:
382                 if rune == "!":
383                     break
384                 if rune != ":":
385                     sender += rune
386             receiver = ""
387             for rune in tokens[2]:
388                 if rune == "!":
389                     break
390                 if rune != ":":
391                     receiver += rune
392             target = sender
393             if receiver != self.nickname:
394                 target = receiver
395             msg = str.join(" ", tokens[3:])[1:]
396             if target == self.channel:
397                 log("<" + sender + "> " + msg)
398             handle_input(msg, target)
399
400         def name_from_join_or_part(tokens):
401             token = tokens[0][1:]
402             index_cut = token.find("@")
403             index_ex = token.find("!")
404             if index_ex > 0 and index_ex < index_cut:
405                 index_cut = index_ex
406             return token[:index_cut]
407
408         while True:
409             line = self.io.recv_line()
410             if not line:
411                 continue
412             tokens = line.split(" ")
413             if len(tokens) > 1:
414                 if tokens[0] == "PING":
415                     self.io.send_line("PONG " + tokens[1])
416                 elif tokens[1] == "PRIVMSG":
417                     handle_privmsg(tokens)
418                 elif tokens[1] == "353":
419                     names = tokens[5:]
420                     names[0] = names[0][1:]
421                     for i in range(len(names)):
422                         names[i] = names[i].replace("@", "").replace("+", "")
423                     self.users_in_chan += names
424                     log(line)
425                 elif tokens[1] == "JOIN":
426                     name = name_from_join_or_part(tokens)
427                     if name != self.nickname:
428                         self.users_in_chan += [name]
429                     log(line)
430                 elif tokens[1] == "PART":
431                     name = name_from_join_or_part(tokens)
432                     del(self.users_in_chan[self.users_in_chan.index(name)])
433                     log(line)
434                 else:
435                     log(line)
436
437
438 def parse_command_line_arguments():
439     parser = argparse.ArgumentParser()
440     parser.add_argument("-s, --server", action="store", dest="server",
441                         default=SERVER,
442                         help="server or server net to connect to (default: "
443                         + SERVER + ")")
444     parser.add_argument("-p, --port", action="store", dest="port", type=int,
445                         default=PORT, help="port to connect to (default : "
446                         + str(PORT) + ")")
447     parser.add_argument("-w, --wait", action="store", dest="timeout",
448                         type=int, default=TIMEOUT,
449                         help="timeout in seconds after which to attempt " +
450                         "reconnect (default: " + str(TIMEOUT) + ")")
451     parser.add_argument("-u, --username", action="store", dest="username",
452                         default=USERNAME, help="username to use (default: "
453                         + USERNAME + ")")
454     parser.add_argument("-n, --nickname", action="store", dest="nickname",
455                         default=NICKNAME, help="nickname to use (default: "
456                         + NICKNAME + ")")
457     parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
458                         default=TWTFILE, help="twtxt file to use (default: "
459                         + TWTFILE + ")")
460     parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
461                         default=DBDIR, help="directory to store DB files in")
462     parser.add_argument("CHANNEL", action="store", help="channel to join")
463     opts, unknown = parser.parse_known_args()
464     return opts
465
466
467 opts = parse_command_line_arguments()
468 while True:
469     try:
470         io = IO(opts.server, opts.port, opts.timeout)
471         hash_server = hashlib.md5(opts.server.encode("utf-8")).hexdigest()
472         dbdir = opts.dbdir + "/" + hash_server 
473         session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
474             opts.twtfile, dbdir)
475         session.loop()
476     except ExceptionForRestart:
477         io.socket.close()
478         continue