17 # Defaults, may be overwritten by command line arguments.
18 SERVER = "irc.freenode.net"
21 USERNAME = "plomlombot"
24 DBDIR = os.path.expanduser("~/plomlombot_db")
27 def write_to_file(path, mode, text):
33 class ExceptionForRestart(Exception):
39 def __init__(self, line):
41 self.tokens = line.split(" ")
43 if self.tokens[0][0] == ":":
44 for rune in self.tokens[0][1:]:
45 if rune in {"!", "@"}:
49 if len(self.tokens) > 2:
50 for rune in self.tokens[2]:
51 if rune in {"!", "@"}:
59 def __init__(self, server, port, timeout):
60 self.timeout = timeout
61 self.socket = socket.socket()
63 self.socket.connect((server, port))
65 raise ExceptionForRestart
66 self.socket.setblocking(0)
69 self.last_pong = time.time()
70 self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
72 def _pingtest(self, send_ping=True):
73 if self.last_pong + self.timeout < time.time():
74 print("SERVER NOT ANSWERING")
75 raise ExceptionForRestart
77 self.send_line("PING " + self.servername)
79 def send_line(self, msg):
80 msg = msg.replace("\r", " ")
81 msg = msg.replace("\n", " ")
82 if len(msg.encode("utf-8")) > 510:
83 print("NOT SENT LINE TO SERVER (too long): " + msg)
84 print("LINE TO SERVER: "
85 + str(datetime.datetime.now()) + ": " + msg)
89 while total_sent_len < msg_len:
90 sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
92 print("SOCKET CONNECTION BROKEN")
93 raise ExceptionForRestart
94 total_sent_len += sent_len
96 def _recv_line_wrapped(self, send_ping=True):
97 if len(self.line_buffer) > 0:
98 return self.line_buffer.pop(0)
100 ready = select.select([self.socket], [], [], int(self.timeout / 2))
102 self._pingtest(send_ping)
104 self.last_pong = time.time()
105 received_bytes = self.socket.recv(1024)
107 received_runes = received_bytes.decode("UTF-8")
108 except UnicodeDecodeError:
109 received_runes = received_bytes.decode("latin1")
110 if len(received_runes) == 0:
111 print("SOCKET CONNECTION BROKEN")
112 raise ExceptionForRestart
113 self.rune_buffer += received_runes
114 lines_split = str.split(self.rune_buffer, "\r\n")
115 self.line_buffer += lines_split[:-1]
116 self.rune_buffer = lines_split[-1]
117 if len(self.line_buffer) > 0:
118 return self.line_buffer.pop(0)
120 def recv_line(self, send_ping=True):
121 line = self._recv_line_wrapped(send_ping)
123 print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
128 def handle_command(command, argument, notice, target, session):
131 if not os.access(session.quotesfile, os.F_OK):
132 write_to_file(session.quotesfile, "w",
133 "QUOTES FOR " + target + ":\n")
134 write_to_file(session.quotesfile, "a", argument + "\n")
135 quotesfile = open(session.quotesfile, "r")
136 lines = quotesfile.readlines()
138 notice("ADDED QUOTE #" + str(len(lines) - 1))
143 notice("SYNTAX: !quote [int] OR !quote search QUERY")
144 notice("QUERY may be a boolean grouping of quoted or unquoted " +
145 "search terms, examples:")
146 notice("!quote search foo")
147 notice("!quote search foo AND (bar OR NOT baz)")
148 notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
154 tokens = argument.split(" ")
155 if (len(tokens) > 1 and tokens[0] != "search") or \
156 (len(tokens) == 1 and
157 (tokens[0] == "search" or not tokens[0].isdigit())):
160 if not os.access(session.quotesfile, os.F_OK):
161 notice("NO QUOTES AVAILABLE")
163 quotesfile = open(session.quotesfile, "r")
164 lines = quotesfile.readlines()
169 if i == 0 or i > len(lines):
170 notice("THERE'S NO QUOTE OF THAT INDEX")
173 elif len(tokens) > 1:
174 query = str.join(" ", tokens[1:])
176 results = plomsearch.search(query, lines)
177 except plomsearch.LogicParserError as err:
178 notice("FAILED QUERY PARSING: " + str(err))
180 if len(results) == 0:
181 notice("NO QUOTES MATCHING QUERY")
184 notice("SHOWING 3 OF " + str(len(results)) + " QUOTES")
185 for result in results[:3]:
186 notice("QUOTE #" + str(result[0] + 1) + ": "
190 i = random.randrange(len(lines))
191 notice("QUOTE #" + str(i + 1) + ": " + lines[i][:-1])
194 from random import choice, shuffle
199 usable_selections = []
200 for i in range(select_length, 0, -1):
201 for selection in selections:
205 if snippet[-j] != selection[-(j+1)]:
209 usable_selections += [selection]
210 if [] != usable_selections:
212 if [] == usable_selections:
213 usable_selections = selections
214 selection = choice(usable_selections)
215 return selection[select_length]
217 if not os.access(session.markovfile, os.F_OK):
218 notice("NOT ENOUGH TEXT TO MARKOV.")
221 # Lowercase incoming lines, ensure they end in a sentence end mark.
222 file = open(session.markovfile, "r")
223 lines = file.readlines()
226 sentence_end_markers = ".!?)("
228 line = line.lower().replace("\n", "")
229 if line[-1] not in sentence_end_markers:
231 tokens += line.split()
232 if len(tokens) <= select_length:
233 notice("NOT ENOUGH TEXT TO MARKOV.")
236 # Replace URLs with escape string for now, so that the Markov selector
237 # won't see them as different strings. Stash replaced URLs in urls.
240 url_starts = ["http://", "https://", "<http://", "<https://"]
241 for i in range(len(tokens)):
242 for url_start in url_starts:
243 if tokens[i][:len(url_start)] == url_start:
244 length = len(tokens[i])
245 if url_start[0] == "<":
247 length = tokens[i].index(">") + 1
250 urls += [tokens[i][:length]]
251 tokens[i] = url_escape + tokens[i][length:]
254 # For each snippet of select_length, use markov() to find continuation
255 # token from selections. Replace present users' names with malkovich.
256 # Start snippets with the beginning of a sentence, if possible.
257 for i in range(len(tokens) - select_length):
259 for j in range(select_length + 1):
260 token_list += [tokens[i + j]]
261 selections += [token_list]
263 for i in range(select_length):
266 for i in range(len(selections)):
267 if selections[i][0][-1] in sentence_end_markers:
268 for i in range(select_length):
269 snippet[i] = selections[i][i + 1]
272 malkovich = "malkovich"
274 new_end = markov(snippet)
275 for name in session.users_in_chan:
276 if new_end[:len(name)] == name.lower():
277 new_end = malkovich + new_end[len(name):]
279 if len(msg) + len(new_end) > 200:
282 for i in range(select_length - 1):
283 snippet[i] = snippet[i + 1]
284 snippet[select_length - 1] = new_end
286 # Replace occurences of url escape string with random choice from urls.
288 index = msg.find(url_escape)
291 msg = msg.replace(url_escape, choice(urls), 1)
293 # More meaningful ways to randomly end sentences.
294 notice(msg + malkovich + ".")
299 twtfile = open(session.twtfile, mode)
300 except (PermissionError, FileNotFoundError) as err:
301 notice("CAN'T ACCESS OR CREATE TWT FILE: " + str(err))
305 from datetime import datetime
306 if not os.access(session.twtfile, os.F_OK):
307 twtfile = try_open("w")
311 twtfile = try_open("a")
314 twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
318 if "addquote" == command:
320 elif "quote" == command:
322 elif "markov" == command:
324 elif "twt" == command:
328 def handle_url(url, notice, show_url=False):
330 def mobile_twitter_hack(url):
331 re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
332 re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
333 m = re.search(re1, url)
334 if m and m.group(1) == 'mobile.twitter.com/' \
335 and m.group(2) == '/status/':
336 m = re.search(re2, url)
337 url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
338 handle_url(url, notice, True)
342 r = requests.get(url, timeout=5, stream=True)
343 r.raw.decode_content = True
344 text = r.raw.read(10000000+1)
345 if len(text) > 10000000:
346 raise ValueError('Too large a response')
347 except (requests.exceptions.TooManyRedirects,
348 requests.exceptions.ConnectionError,
349 requests.exceptions.InvalidURL,
352 requests.exceptions.InvalidSchema) as error:
353 notice("TROUBLE FOLLOWING URL: " + str(error))
355 if mobile_twitter_hack(url):
357 title = bs4.BeautifulSoup(text, "html5lib").title
358 if title and title.string:
359 prefix = "PAGE TITLE: "
361 prefix = "PAGE TITLE FOR <" + url + ">: "
362 notice(prefix + title.string.strip())
364 notice("PAGE HAS NO TITLE TAG")
369 def __init__(self, io, username, nickname, channel, twtfile, dbdir, rmlogs):
371 self.nickname = nickname
372 self.username = username
373 self.channel = channel
374 self.users_in_chan = []
375 self.twtfile = twtfile
378 self.io.send_line("NICK " + self.nickname)
379 self.io.send_line("USER " + self.username + " 0 * : ")
380 self.io.send_line("JOIN " + self.channel)
381 hash_channel = hashlib.md5(self.channel.encode("utf-8")).hexdigest()
382 self.chandir = self.dbdir + "/" + hash_channel + "/"
383 self.rawlogdir = self.chandir + "raw_logs/"
384 self.logdir = self.chandir + "logs/"
385 if not os.path.exists(self.logdir):
386 os.makedirs(self.logdir)
387 if not os.path.exists(self.rawlogdir):
388 os.makedirs(self.rawlogdir)
389 self.markovfile = self.chandir + "markovfeed"
390 self.quotesfile = self.chandir + "quotes"
395 if type(line) == str:
396 line = Line(":" + self.nickname + "!~" + self.username +
397 "@localhost" + " " + line)
398 now = datetime.datetime.utcnow()
399 form = "%Y-%m-%d %H:%M:%S UTC\t"
400 write_to_file(self.rawlogdir + now.strftime("%Y-%m-%d") + ".txt",
401 "a", now.strftime(form) + " " + line.line + "\n")
402 to_log = irclog.format_logline(line, self.channel)
404 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt",
405 "a", now.strftime(form) + " " + to_log + "\n")
407 def handle_privmsg(line):
410 line = "NOTICE " + target + " :" + msg
411 self.io.send_line(line)
415 if line.receiver != self.nickname:
416 target = line.receiver
417 msg = str.join(" ", line.tokens[3:])[1:]
418 matches = re.findall("(https?://[^\s>]+)", msg)
419 for i in range(len(matches)):
420 handle_url(matches[i], notice)
422 tokens = msg[1:].split()
423 argument = str.join(" ", tokens[1:])
424 handle_command(tokens[0], argument, notice, target, self)
426 write_to_file(self.markovfile, "a", msg + "\n")
428 now = datetime.datetime.utcnow()
429 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a",
430 "-----------------------\n")
433 for f in os.listdir(self.logdir):
434 f = os.path.join(self.logdir, f)
435 if os.path.isfile(f) and \
436 os.stat(f).st_mtime < time.time() - self.rmlogs:
438 line = self.io.recv_line()
443 if len(line.tokens) > 1:
444 if line.tokens[0] == "PING":
445 self.io.send_line("PONG " + line.tokens[1])
446 elif line.tokens[1] == "PRIVMSG":
448 elif line.tokens[1] == "353":
449 names = line.tokens[5:]
450 names[0] = names[0][1:]
451 for i in range(len(names)):
452 names[i] = names[i].replace("@", "").replace("+", "")
453 self.users_in_chan += names
454 elif line.tokens[1] == "JOIN" and line.sender != self.nickname:
455 self.users_in_chan += [line.sender]
456 elif line.tokens[1] == "PART":
457 del(self.users_in_chan[self.users_in_chan.index(line.sender)])
458 elif line.tokens[1] == "NICK":
459 del(self.users_in_chan[self.users_in_chan.index(line.sender)])
460 self.users_in_chan += [line.receiver]
463 def parse_command_line_arguments():
464 parser = argparse.ArgumentParser()
465 parser.add_argument("-s, --server", action="store", dest="server",
467 help="server or server net to connect to (default: "
469 parser.add_argument("-p, --port", action="store", dest="port", type=int,
470 default=PORT, help="port to connect to (default : "
472 parser.add_argument("-w, --wait", action="store", dest="timeout",
473 type=int, default=TIMEOUT,
474 help="timeout in seconds after which to attempt "
475 "reconnect (default: " + str(TIMEOUT) + ")")
476 parser.add_argument("-u, --username", action="store", dest="username",
477 default=USERNAME, help="username to use (default: "
479 parser.add_argument("-n, --nickname", action="store", dest="nickname",
480 default=NICKNAME, help="nickname to use (default: "
482 parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
483 default=TWTFILE, help="twtxt file to use (default: "
485 parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
486 default=DBDIR, help="directory to store DB files in")
487 parser.add_argument("-r, --rmlogs", action="store", dest="rmlogs",
489 help="maximum age in seconds for logfiles in logs/ "
490 "(0 means: never delete, and is default)")
491 parser.add_argument("CHANNEL", action="store", help="channel to join")
492 opts, unknown = parser.parse_known_args()
496 opts = parse_command_line_arguments()
499 io = IO(opts.server, opts.port, opts.timeout)
500 hash_server = hashlib.md5(opts.server.encode("utf-8")).hexdigest()
501 dbdir = opts.dbdir + "/" + hash_server
502 session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
503 opts.twtfile, dbdir, opts.rmlogs)
505 except ExceptionForRestart: