18 # Defaults, may be overwritten by command line arguments.
19 SERVER = "irc.freenode.net"
22 USERNAME = "plomlombot"
25 DBDIR = os.path.expanduser("~/plomlombot_db")
28 def write_to_file(path, mode, text):
34 class ExceptionForRestart(Exception):
40 def __init__(self, line):
42 self.tokens = line.split(" ")
44 if self.tokens[0][0] == ":":
45 for rune in self.tokens[0][1:]:
46 if rune in {"!", "@"}:
50 if len(self.tokens) > 2:
51 for rune in self.tokens[2]:
52 if rune in {"!", "@"}:
60 def __init__(self, server, port, timeout):
61 self.timeout = timeout
62 self.socket = socket.socket()
64 self.socket.connect((server, port))
66 raise ExceptionForRestart
67 self.socket.setblocking(0)
70 self.last_pong = time.time()
71 self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
73 def _pingtest(self, send_ping=True):
74 if self.last_pong + self.timeout < time.time():
75 print("SERVER NOT ANSWERING")
76 raise ExceptionForRestart
78 self.send_line("PING " + self.servername)
80 def send_line(self, msg):
81 msg = msg.replace("\r", " ")
82 msg = msg.replace("\n", " ")
83 if len(msg.encode("utf-8")) > 510:
84 print("NOT SENT LINE TO SERVER (too long): " + msg)
85 print("LINE TO SERVER: "
86 + str(datetime.datetime.now()) + ": " + msg)
90 while total_sent_len < msg_len:
91 sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
93 print("SOCKET CONNECTION BROKEN")
94 raise ExceptionForRestart
95 total_sent_len += sent_len
97 def _recv_line_wrapped(self, send_ping=True):
98 if len(self.line_buffer) > 0:
99 return self.line_buffer.pop(0)
101 ready = select.select([self.socket], [], [], int(self.timeout / 2))
103 self._pingtest(send_ping)
105 self.last_pong = time.time()
106 received_bytes = self.socket.recv(1024)
108 received_runes = received_bytes.decode("UTF-8")
109 except UnicodeDecodeError:
110 received_runes = received_bytes.decode("latin1")
111 if len(received_runes) == 0:
112 print("SOCKET CONNECTION BROKEN")
113 raise ExceptionForRestart
114 self.rune_buffer += received_runes
115 lines_split = str.split(self.rune_buffer, "\r\n")
116 self.line_buffer += lines_split[:-1]
117 self.rune_buffer = lines_split[-1]
118 if len(self.line_buffer) > 0:
119 return self.line_buffer.pop(0)
121 def recv_line(self, send_ping=True):
122 line = self._recv_line_wrapped(send_ping)
124 print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
129 def handle_command(command, argument, notice, target, session):
132 if not os.access(session.quotesfile, os.F_OK):
133 write_to_file(session.quotesfile, "w",
134 "QUOTES FOR " + target + ":\n")
135 write_to_file(session.quotesfile, "a", argument + "\n")
136 quotesfile = open(session.quotesfile, "r")
137 lines = quotesfile.readlines()
139 notice("added quote #" + str(len(lines) - 1))
144 notice("syntax: !quote [int] OR !quote search QUERY")
145 notice("QUERY may be a boolean grouping of quoted or unquoted " +
146 "search terms, examples:")
147 notice("!quote search foo")
148 notice("!quote search foo AND (bar OR NOT baz)")
149 notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
155 tokens = argument.split(" ")
156 if (len(tokens) > 1 and tokens[0] != "search") or \
157 (len(tokens) == 1 and
158 (tokens[0] == "search" or not tokens[0].isdigit())):
161 if not os.access(session.quotesfile, os.F_OK):
162 notice("no quotes available")
164 quotesfile = open(session.quotesfile, "r")
165 lines = quotesfile.readlines()
170 if i == 0 or i > len(lines):
171 notice("there's no quote of that index")
174 elif len(tokens) > 1:
175 query = str.join(" ", tokens[1:])
177 results = plomsearch.search(query, lines)
178 except plomsearch.LogicParserError as err:
179 notice("failed query parsing: " + str(err))
181 if len(results) == 0:
182 notice("no quotes matching query")
185 notice("showing 3 of " + str(len(results)) + " quotes")
186 for result in results[:3]:
187 notice("quote #" + str(result[0] + 1) + ": "
191 i = random.randrange(len(lines))
192 notice("quote #" + str(i + 1) + ": " + lines[i][:-1])
197 notice("syntax: !markov [int]")
200 usable_selections = []
201 for i in range(select_length, 0, -1):
202 for selection in selections:
206 if snippet[-j] != selection[-(j+1)]:
210 usable_selections += [selection]
211 if [] != usable_selections:
213 if [] == usable_selections:
214 usable_selections = selections
215 selection = choice(usable_selections)
216 return selection[select_length]
221 tokens = argument.split(" ")
222 if (len(tokens) > 1 or (len(tokens) == 1 and not tokens[0].isdigit())):
226 from random import choice, shuffle
233 notice("bad value, using default: " + str(select_length))
236 if not os.access(session.markovfile, os.F_OK):
237 notice("not enough text to markov")
240 # Lowercase incoming lines, ensure they end in a sentence end mark.
241 file = open(session.markovfile, "r")
242 lines = file.readlines()
245 sentence_end_markers = ".!?)("
247 line = line.lower().replace("\n", "")
248 if line[-1] not in sentence_end_markers:
250 tokens += line.split()
251 if len(tokens) - 1 <= select_length:
252 notice("not enough text to markov")
255 # Replace URLs with escape string for now, so that the Markov selector
256 # won't see them as different strings. Stash replaced URLs in urls.
259 url_starts = ["http://", "https://", "<http://", "<https://"]
260 for i in range(len(tokens)):
261 for url_start in url_starts:
262 if tokens[i][:len(url_start)] == url_start:
263 length = len(tokens[i])
264 if url_start[0] == "<":
266 length = tokens[i].index(">") + 1
269 urls += [tokens[i][:length]]
270 tokens[i] = url_escape + tokens[i][length:]
273 # For each snippet of select_length, use markov() to find continuation
274 # token from selections. Replace present users' names with malkovich.
275 # Start snippets with the beginning of a sentence, if possible.
276 for i in range(len(tokens) - select_length):
278 for j in range(select_length + 1):
279 token_list += [tokens[i + j]]
280 selections += [token_list]
282 for i in range(select_length):
285 for i in range(len(selections)):
286 if selections[i][0][-1] in sentence_end_markers:
287 for j in range(select_length):
288 snippet[j] = selections[j][j + 1]
291 malkovich = "malkovich"
293 new_end = markov(snippet)
294 for name in session.users_in_chan:
295 if new_end[:len(name)] == name.lower():
296 new_end = malkovich + new_end[len(name):]
298 if len(msg) + len(new_end) > 200:
301 for i in range(select_length - 1):
302 snippet[i] = snippet[i + 1]
303 snippet[select_length - 1] = new_end
305 # Replace occurences of url escape string with random choice from urls.
307 index = msg.find(url_escape)
310 msg = msg.replace(url_escape, choice(urls), 1)
312 # More meaningful ways to randomly end sentences.
313 notice(msg + malkovich + ".")
318 twtfile = open(session.twtfile, mode)
319 except (PermissionError, FileNotFoundError) as err:
320 notice("can't access or create twt file: " + str(err))
324 from datetime import datetime
325 if not os.access(session.twtfile, os.F_OK):
326 twtfile = try_open("w")
330 twtfile = try_open("a")
333 twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
337 if "addquote" == command:
339 elif "quote" == command:
341 elif "markov" == command:
343 elif "twt" == command:
347 def handle_url(url, notice, show_url=False):
349 def mobile_twitter_hack(url):
350 re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
351 re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
352 m = re.search(re1, url)
353 if m and m.group(1) == 'mobile.twitter.com/' \
354 and m.group(2) == '/status/':
355 m = re.search(re2, url)
356 url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
357 handle_url(url, notice, True)
360 class TimeOut(Exception):
363 def timeout_handler(ignore1, ignore2):
364 raise TimeOut("timeout")
366 signal.signal(signal.SIGALRM, timeout_handler)
369 r = requests.get(url, headers = {'User-Agent': 'plomlombot'}, stream=True)
370 r.raw.decode_content = True
371 text = r.raw.read(10000000+1)
372 if len(text) > 10000000:
373 raise ValueError('Too large a response')
374 except (requests.exceptions.TooManyRedirects,
375 requests.exceptions.ConnectionError,
376 requests.exceptions.InvalidURL,
380 requests.exceptions.InvalidSchema) as error:
382 notice("trouble following url: " + str(error))
385 if mobile_twitter_hack(url):
387 title = bs4.BeautifulSoup(text, "html5lib").title
388 if title and title.string:
389 prefix = "page title: "
391 prefix = "page title for <" + url + ">: "
392 notice(prefix + title.string.strip())
394 notice("page has no title tag")
400 def __init__(self, io, username, nickname, channel, twtfile, dbdir, rmlogs):
402 self.nickname = nickname
403 self.username = username
404 self.channel = channel
405 self.users_in_chan = []
406 self.twtfile = twtfile
409 self.io.send_line("NICK " + self.nickname)
410 self.io.send_line("USER " + self.username + " 0 * : ")
411 self.io.send_line("JOIN " + self.channel)
412 hash_channel = hashlib.md5(self.channel.encode("utf-8")).hexdigest()
413 self.chandir = self.dbdir + "/" + hash_channel + "/"
414 self.rawlogdir = self.chandir + "raw_logs/"
415 self.logdir = self.chandir + "logs/"
416 if not os.path.exists(self.logdir):
417 os.makedirs(self.logdir)
418 if not os.path.exists(self.rawlogdir):
419 os.makedirs(self.rawlogdir)
420 self.markovfile = self.chandir + "markovfeed"
421 self.quotesfile = self.chandir + "quotes"
426 if type(line) == str:
427 line = Line(":" + self.nickname + "!~" + self.username +
428 "@localhost" + " " + line)
429 now = datetime.datetime.utcnow()
430 form = "%Y-%m-%d %H:%M:%S UTC\t"
431 write_to_file(self.rawlogdir + now.strftime("%Y-%m-%d") + ".txt",
432 "a", now.strftime(form) + " " + line.line + "\n")
433 to_log = irclog.format_logline(line, self.channel)
435 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt",
436 "a", now.strftime(form) + " " + to_log + "\n")
438 def handle_privmsg(line):
441 line = "NOTICE " + target + " :" + msg
442 self.io.send_line(line)
446 if line.receiver != self.nickname:
447 target = line.receiver
448 msg = str.join(" ", line.tokens[3:])[1:]
449 matches = re.findall("(https?://[^\s>]+)", msg)
451 for i in range(len(matches)):
452 if handle_url(matches[i], notice):
455 notice("maximum number of urls to parse per message "
459 tokens = msg[1:].split()
460 argument = str.join(" ", tokens[1:])
461 handle_command(tokens[0], argument, notice, target, self)
463 write_to_file(self.markovfile, "a", msg + "\n")
465 now = datetime.datetime.utcnow()
466 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a",
467 "-----------------------\n")
470 for f in os.listdir(self.logdir):
471 f = os.path.join(self.logdir, f)
472 if os.path.isfile(f) and \
473 os.stat(f).st_mtime < time.time() - self.rmlogs:
475 line = self.io.recv_line()
480 if len(line.tokens) > 1:
481 if line.tokens[0] == "PING":
482 self.io.send_line("PONG " + line.tokens[1])
483 elif line.tokens[1] == "PRIVMSG":
485 elif line.tokens[1] == "353":
486 names = line.tokens[5:]
487 names[0] = names[0][1:]
488 for i in range(len(names)):
489 names[i] = names[i].replace("@", "").replace("+", "")
490 self.users_in_chan += names
491 elif line.tokens[1] == "JOIN" and line.sender != self.nickname:
492 self.users_in_chan += [line.sender]
493 elif line.tokens[1] == "PART":
494 del(self.users_in_chan[self.users_in_chan.index(line.sender)])
495 elif line.tokens[1] == "NICK":
496 del(self.users_in_chan[self.users_in_chan.index(line.sender)])
497 self.users_in_chan += [line.receiver]
500 def parse_command_line_arguments():
501 parser = argparse.ArgumentParser()
502 parser.add_argument("-s, --server", action="store", dest="server",
504 help="server or server net to connect to (default: "
506 parser.add_argument("-p, --port", action="store", dest="port", type=int,
507 default=PORT, help="port to connect to (default : "
509 parser.add_argument("-w, --wait", action="store", dest="timeout",
510 type=int, default=TIMEOUT,
511 help="timeout in seconds after which to attempt "
512 "reconnect (default: " + str(TIMEOUT) + ")")
513 parser.add_argument("-u, --username", action="store", dest="username",
514 default=USERNAME, help="username to use (default: "
516 parser.add_argument("-n, --nickname", action="store", dest="nickname",
517 default=NICKNAME, help="nickname to use (default: "
519 parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
520 default=TWTFILE, help="twtxt file to use (default: "
522 parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
523 default=DBDIR, help="directory to store DB files in")
524 parser.add_argument("-r, --rmlogs", action="store", dest="rmlogs",
526 help="maximum age in seconds for logfiles in logs/ "
527 "(0 means: never delete, and is default)")
528 parser.add_argument("CHANNEL", action="store", help="channel to join")
529 opts, unknown = parser.parse_known_args()
533 opts = parse_command_line_arguments()
536 io = IO(opts.server, opts.port, opts.timeout)
537 hash_server = hashlib.md5(opts.server.encode("utf-8")).hexdigest()
538 dbdir = opts.dbdir + "/" + hash_server
539 session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
540 opts.twtfile, dbdir, opts.rmlogs)
542 except ExceptionForRestart: