18 # Defaults, may be overwritten by command line arguments.
19 SERVER = "irc.freenode.net"
22 USERNAME = "plomlombot"
25 DBDIR = os.path.expanduser("~/plomlombot_db")
28 def write_to_file(path, mode, text):
34 class ExceptionForRestart(Exception):
40 def __init__(self, line):
42 self.tokens = line.split(" ")
44 if self.tokens[0][0] == ":":
45 for rune in self.tokens[0][1:]:
46 if rune in {"!", "@"}:
50 if len(self.tokens) > 2:
51 for rune in self.tokens[2]:
52 if rune in {"!", "@"}:
60 def __init__(self, server, port, timeout):
61 self.timeout = timeout
62 self.socket = socket.socket()
64 self.socket.connect((server, port))
66 raise ExceptionForRestart
67 self.socket.setblocking(0)
70 self.last_pong = time.time()
71 self.servername = self.recv_line(send_ping=False).split(" ")[0][1:]
73 def _pingtest(self, send_ping=True):
74 if self.last_pong + self.timeout < time.time():
75 print("SERVER NOT ANSWERING")
76 raise ExceptionForRestart
78 self.send_line("PING " + self.servername)
80 def send_line(self, msg):
81 msg = msg.replace("\r", " ")
82 msg = msg.replace("\n", " ")
83 if len(msg.encode("utf-8")) > 510:
84 print("NOT SENT LINE TO SERVER (too long): " + msg)
85 print("LINE TO SERVER: "
86 + str(datetime.datetime.now()) + ": " + msg)
90 while total_sent_len < msg_len:
91 sent_len = self.socket.send(bytes(msg[total_sent_len:], "UTF-8"))
93 print("SOCKET CONNECTION BROKEN")
94 raise ExceptionForRestart
95 total_sent_len += sent_len
97 def _recv_line_wrapped(self, send_ping=True):
98 if len(self.line_buffer) > 0:
99 return self.line_buffer.pop(0)
101 ready = select.select([self.socket], [], [], int(self.timeout / 2))
103 self._pingtest(send_ping)
105 self.last_pong = time.time()
106 received_bytes = self.socket.recv(1024)
108 received_runes = received_bytes.decode("UTF-8")
109 except UnicodeDecodeError:
110 received_runes = received_bytes.decode("latin1")
111 if len(received_runes) == 0:
112 print("SOCKET CONNECTION BROKEN")
113 raise ExceptionForRestart
114 self.rune_buffer += received_runes
115 lines_split = str.split(self.rune_buffer, "\r\n")
116 self.line_buffer += lines_split[:-1]
117 self.rune_buffer = lines_split[-1]
118 if len(self.line_buffer) > 0:
119 return self.line_buffer.pop(0)
121 def recv_line(self, send_ping=True):
122 line = self._recv_line_wrapped(send_ping)
124 print("LINE FROM SERVER " + str(datetime.datetime.now()) + ": " +
129 def handle_command(command, argument, notice, target, session):
132 if not os.access(session.quotesfile, os.F_OK):
133 write_to_file(session.quotesfile, "w",
134 "QUOTES FOR " + target + ":\n")
135 write_to_file(session.quotesfile, "a", argument + "\n")
136 quotesfile = open(session.quotesfile, "r")
137 lines = quotesfile.readlines()
139 notice("added quote #" + str(len(lines) - 1))
144 notice("syntax: !quote [int] OR !quote search QUERY")
145 notice("QUERY may be a boolean grouping of quoted or unquoted " +
146 "search terms, examples:")
147 notice("!quote search foo")
148 notice("!quote search foo AND (bar OR NOT baz)")
149 notice("!quote search \"foo\\\"bar\" AND ('NOT\"' AND \"'foo'\"" +
155 tokens = argument.split(" ")
156 if (len(tokens) > 1 and tokens[0] != "search") or \
157 (len(tokens) == 1 and
158 (tokens[0] == "search" or not tokens[0].isdigit())):
161 if not os.access(session.quotesfile, os.F_OK):
162 notice("no quotes available")
164 quotesfile = open(session.quotesfile, "r")
165 lines = quotesfile.readlines()
170 if i == 0 or i > len(lines):
171 notice("there's no quote of that index")
174 elif len(tokens) > 1:
175 query = str.join(" ", tokens[1:])
177 results = plomsearch.search(query, lines)
178 except plomsearch.LogicParserError as err:
179 notice("failed query parsing: " + str(err))
181 if len(results) == 0:
182 notice("no quotes matching query")
185 notice("showing 3 of " + str(len(results)) + " quotes")
186 for result in results[:3]:
187 notice("quote #" + str(result[0] + 1) + ": "
191 i = random.randrange(len(lines))
192 notice("quote #" + str(i + 1) + ": " + lines[i][:-1])
195 from random import choice, shuffle
200 usable_selections = []
201 for i in range(select_length, 0, -1):
202 for selection in selections:
206 if snippet[-j] != selection[-(j+1)]:
210 usable_selections += [selection]
211 if [] != usable_selections:
213 if [] == usable_selections:
214 usable_selections = selections
215 selection = choice(usable_selections)
216 return selection[select_length]
218 if not os.access(session.markovfile, os.F_OK):
219 notice("not enough text to markov")
222 # Lowercase incoming lines, ensure they end in a sentence end mark.
223 file = open(session.markovfile, "r")
224 lines = file.readlines()
227 sentence_end_markers = ".!?)("
229 line = line.lower().replace("\n", "")
230 if line[-1] not in sentence_end_markers:
232 tokens += line.split()
233 if len(tokens) <= select_length:
234 notice("not enough text to markov")
237 # Replace URLs with escape string for now, so that the Markov selector
238 # won't see them as different strings. Stash replaced URLs in urls.
241 url_starts = ["http://", "https://", "<http://", "<https://"]
242 for i in range(len(tokens)):
243 for url_start in url_starts:
244 if tokens[i][:len(url_start)] == url_start:
245 length = len(tokens[i])
246 if url_start[0] == "<":
248 length = tokens[i].index(">") + 1
251 urls += [tokens[i][:length]]
252 tokens[i] = url_escape + tokens[i][length:]
255 # For each snippet of select_length, use markov() to find continuation
256 # token from selections. Replace present users' names with malkovich.
257 # Start snippets with the beginning of a sentence, if possible.
258 for i in range(len(tokens) - select_length):
260 for j in range(select_length + 1):
261 token_list += [tokens[i + j]]
262 selections += [token_list]
264 for i in range(select_length):
267 for i in range(len(selections)):
268 if selections[i][0][-1] in sentence_end_markers:
269 for i in range(select_length):
270 snippet[i] = selections[i][i + 1]
273 malkovich = "malkovich"
275 new_end = markov(snippet)
276 for name in session.users_in_chan:
277 if new_end[:len(name)] == name.lower():
278 new_end = malkovich + new_end[len(name):]
280 if len(msg) + len(new_end) > 200:
283 for i in range(select_length - 1):
284 snippet[i] = snippet[i + 1]
285 snippet[select_length - 1] = new_end
287 # Replace occurences of url escape string with random choice from urls.
289 index = msg.find(url_escape)
292 msg = msg.replace(url_escape, choice(urls), 1)
294 # More meaningful ways to randomly end sentences.
295 notice(msg + malkovich + ".")
300 twtfile = open(session.twtfile, mode)
301 except (PermissionError, FileNotFoundError) as err:
302 notice("can't access or create twt file: " + str(err))
306 from datetime import datetime
307 if not os.access(session.twtfile, os.F_OK):
308 twtfile = try_open("w")
312 twtfile = try_open("a")
315 twtfile.write(datetime.utcnow().isoformat() + "\t" + argument + "\n")
319 if "addquote" == command:
321 elif "quote" == command:
323 elif "markov" == command:
325 elif "twt" == command:
329 def handle_url(url, notice, show_url=False):
331 def mobile_twitter_hack(url):
332 re1 = 'https?://(mobile.twitter.com/)[^/]+(/status/)'
333 re2 = 'https?://mobile.twitter.com/([^/]+)/status/([^\?/]+)'
334 m = re.search(re1, url)
335 if m and m.group(1) == 'mobile.twitter.com/' \
336 and m.group(2) == '/status/':
337 m = re.search(re2, url)
338 url = 'https://twitter.com/' + m.group(1) + '/status/' + m.group(2)
339 handle_url(url, notice, True)
342 class TimeOut(Exception):
345 def timeout_handler(ignore1, ignore2):
346 raise TimeOut("timeout")
348 signal.signal(signal.SIGALRM, timeout_handler)
351 r = requests.get(url, headers = {'User-Agent': 'plomlombot'}, stream=True)
352 r.raw.decode_content = True
353 text = r.raw.read(10000000+1)
354 if len(text) > 10000000:
355 raise ValueError('Too large a response')
356 except (requests.exceptions.TooManyRedirects,
357 requests.exceptions.ConnectionError,
358 requests.exceptions.InvalidURL,
362 requests.exceptions.InvalidSchema) as error:
364 notice("trouble following url: " + str(error))
367 if mobile_twitter_hack(url):
369 title = bs4.BeautifulSoup(text, "html5lib").title
370 if title and title.string:
371 prefix = "page title: "
373 prefix = "page title for <" + url + ">: "
374 notice(prefix + title.string.strip())
376 notice("page has no title tag")
382 def __init__(self, io, username, nickname, channel, twtfile, dbdir, rmlogs):
384 self.nickname = nickname
385 self.username = username
386 self.channel = channel
387 self.users_in_chan = []
388 self.twtfile = twtfile
391 self.io.send_line("NICK " + self.nickname)
392 self.io.send_line("USER " + self.username + " 0 * : ")
393 self.io.send_line("JOIN " + self.channel)
394 hash_channel = hashlib.md5(self.channel.encode("utf-8")).hexdigest()
395 self.chandir = self.dbdir + "/" + hash_channel + "/"
396 self.rawlogdir = self.chandir + "raw_logs/"
397 self.logdir = self.chandir + "logs/"
398 if not os.path.exists(self.logdir):
399 os.makedirs(self.logdir)
400 if not os.path.exists(self.rawlogdir):
401 os.makedirs(self.rawlogdir)
402 self.markovfile = self.chandir + "markovfeed"
403 self.quotesfile = self.chandir + "quotes"
408 if type(line) == str:
409 line = Line(":" + self.nickname + "!~" + self.username +
410 "@localhost" + " " + line)
411 now = datetime.datetime.utcnow()
412 form = "%Y-%m-%d %H:%M:%S UTC\t"
413 write_to_file(self.rawlogdir + now.strftime("%Y-%m-%d") + ".txt",
414 "a", now.strftime(form) + " " + line.line + "\n")
415 to_log = irclog.format_logline(line, self.channel)
417 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt",
418 "a", now.strftime(form) + " " + to_log + "\n")
420 def handle_privmsg(line):
423 line = "NOTICE " + target + " :" + msg
424 self.io.send_line(line)
428 if line.receiver != self.nickname:
429 target = line.receiver
430 msg = str.join(" ", line.tokens[3:])[1:]
431 matches = re.findall("(https?://[^\s>]+)", msg)
433 for i in range(len(matches)):
434 if handle_url(matches[i], notice):
437 notice("maximum number of urls to parse per message "
441 tokens = msg[1:].split()
442 argument = str.join(" ", tokens[1:])
443 handle_command(tokens[0], argument, notice, target, self)
445 write_to_file(self.markovfile, "a", msg + "\n")
447 now = datetime.datetime.utcnow()
448 write_to_file(self.logdir + now.strftime("%Y-%m-%d") + ".txt", "a",
449 "-----------------------\n")
452 for f in os.listdir(self.logdir):
453 f = os.path.join(self.logdir, f)
454 if os.path.isfile(f) and \
455 os.stat(f).st_mtime < time.time() - self.rmlogs:
457 line = self.io.recv_line()
462 if len(line.tokens) > 1:
463 if line.tokens[0] == "PING":
464 self.io.send_line("PONG " + line.tokens[1])
465 elif line.tokens[1] == "PRIVMSG":
467 elif line.tokens[1] == "353":
468 names = line.tokens[5:]
469 names[0] = names[0][1:]
470 for i in range(len(names)):
471 names[i] = names[i].replace("@", "").replace("+", "")
472 self.users_in_chan += names
473 elif line.tokens[1] == "JOIN" and line.sender != self.nickname:
474 self.users_in_chan += [line.sender]
475 elif line.tokens[1] == "PART":
476 del(self.users_in_chan[self.users_in_chan.index(line.sender)])
477 elif line.tokens[1] == "NICK":
478 del(self.users_in_chan[self.users_in_chan.index(line.sender)])
479 self.users_in_chan += [line.receiver]
482 def parse_command_line_arguments():
483 parser = argparse.ArgumentParser()
484 parser.add_argument("-s, --server", action="store", dest="server",
486 help="server or server net to connect to (default: "
488 parser.add_argument("-p, --port", action="store", dest="port", type=int,
489 default=PORT, help="port to connect to (default : "
491 parser.add_argument("-w, --wait", action="store", dest="timeout",
492 type=int, default=TIMEOUT,
493 help="timeout in seconds after which to attempt "
494 "reconnect (default: " + str(TIMEOUT) + ")")
495 parser.add_argument("-u, --username", action="store", dest="username",
496 default=USERNAME, help="username to use (default: "
498 parser.add_argument("-n, --nickname", action="store", dest="nickname",
499 default=NICKNAME, help="nickname to use (default: "
501 parser.add_argument("-t, --twtxtfile", action="store", dest="twtfile",
502 default=TWTFILE, help="twtxt file to use (default: "
504 parser.add_argument("-d, --dbdir", action="store", dest="dbdir",
505 default=DBDIR, help="directory to store DB files in")
506 parser.add_argument("-r, --rmlogs", action="store", dest="rmlogs",
508 help="maximum age in seconds for logfiles in logs/ "
509 "(0 means: never delete, and is default)")
510 parser.add_argument("CHANNEL", action="store", help="channel to join")
511 opts, unknown = parser.parse_known_args()
515 opts = parse_command_line_arguments()
518 io = IO(opts.server, opts.port, opts.timeout)
519 hash_server = hashlib.md5(opts.server.encode("utf-8")).hexdigest()
520 dbdir = opts.dbdir + "/" + hash_server
521 session = Session(io, opts.username, opts.nickname, opts.CHANNEL,
522 opts.twtfile, dbdir, opts.rmlogs)
524 except ExceptionForRestart: